Intro

In this script, I load exchange data from datras and calculate catch of cod and flounder in unit kg/km^2 (with TVL gear) by size group, by correcting for gear dimensions, sweeplength and trawl speed, following Orio et al 2017.

Load libraries

library(tidyverse)
#> Warning: package 'tidyr' was built under R version 4.0.5
library(readxl)
library(tidylog)
library(RCurl)
library(viridis)
library(RColorBrewer)
#> Warning: package 'RColorBrewer' was built under R version 4.0.5
library(patchwork)
library(janitor)
library(icesDatras)
library(mapdata)
library(patchwork)
library(rgdal)
library(raster)
library(sf)
#> Warning: package 'sf' was built under R version 4.0.5
library(rgeos)
library(chron)
library(lattice)
library(ncdf4)
library(marmap)
library(rnaturalearth)
library(rnaturalearthdata)
library(mapplots)
library(geosphere)
library(modelr)
library(devtools)

world <- ne_countries(scale = "medium", returnclass = "sf")

# Source code for map plots
source_url("https://raw.githubusercontent.com/maxlindmark/spatial-metabolic-index/main/R/functions/map-plot.R")

# Source code for lon lat to utm
source_url("https://raw.githubusercontent.com/maxlindmark/spatial-metabolic-index/main/R/functions/lon-lat-utm.R")

theme_set(theme_plot())

Read data

# Data were read in from getDATRAS on 2022.09.06
# Read HH data
# bits_hh <- getDATRAS(record = "HH", survey = "BITS", years = 1991:2020, quarters = c(1, 4))
# write.csv(bits_hh, "data/DATRAS_exchange/bits_hh.csv")
bits_hh <- read.csv("data/DATRAS_exchange/bits_hh.csv") %>% filter(Year > 1992) # To match covariates

# Read HL data
# bits_hl <- getDATRAS(record = "HL", survey = "BITS", years = 1991:2020, quarters = c(1, 4))
# write.csv(bits_hl, "data/DATRAS_exchange/bits_hl.csv")
bits_hl <- read.csv("data/DATRAS_exchange/bits_hl.csv") %>% filter(Year > 1992) # To match covariates

# Read CA data
# bits_ca <- getDATRAS(record = "CA", survey = "BITS", years = 1991:2020, quarters = c(1, 4))
# write.csv(bits_ca, "data/DATRAS_exchange/bits_ca.csv")
bits_ca <- read.csv("data/DATRAS_exchange/bits_ca.csv") %>% filter(Year > 1992) # To match covariates

# Read gear standardization data 
sweep <- read.csv("data/gear_standardization/sweep_9116.csv", sep = ";", dec = ",", fileEncoding = "latin1")
sweep <- read.csv("data/gear_standardization/sweep_9118_ml.csv", sep = ";", fileEncoding = "latin1")

Standardize catch data

Standardize ships

# Before creating a a new ID, make sure that countries and ships names use the same format
sort(unique(sweep$Ship))
#>  [1] "26HF" "ATL"  "ATLD" "BAL"  "BALL" "BPE"  "CEV"  "CLP"  "CLV"  "COML"
#> [11] "DAN2" "DANS" "DAR"  "GDY"  "HAF"  "KOH"  "KOOT" "MON"  "MONL" "SOL" 
#> [21] "SOL2" "VSH"  "ZBA"
sort(unique(bits_hh$Ship))
#>  [1] "06JR" "06S1" "06SL" "26D4" "26HF" "26HI" "67BC" "77AR" "77MA" "77SE"
#> [11] "AA36" "ESLF" "ESOR" "ESTM" "LAIZ" "LTDA" "RUEK" "RUJB" "RUNT" "RUS6"
sort(unique(bits_hl$Ship))
#>  [1] "06JR" "06S1" "06SL" "26D4" "26HF" "26HI" "67BC" "77AR" "77MA" "77SE"
#> [11] "AA36" "ESLF" "ESOR" "ESTM" "LAIZ" "LTDA" "RUEK" "RUJB" "RUNT" "RUS6"

# Change back to the old Ship name standard...
# https://vocab.ices.dk/?ref=315
# https://vocab.ices.dk/?ref=315
# Assumptions:
# SOL is Solea on ICES links above, and SOL1 is the older one of the two SOLs (1 and 2)
# DAN is Dana
# sweep %>% filter(Ship == "DANS") %>% distinct(Year, Country)
# sweep %>% filter(Ship == "DAN2") %>% distinct(Year)
# bits_hh %>% filter(Ship == "67BC") %>% distinct(Year, Country)
# sweep %>% filter(Ship == "DAN2") %>% distinct(Year)
# bits_hh %>% filter(Ship == "26D4") %>% distinct(Year) # Strange that 26DF doesn't extend far back. Which ship did the Danes use? Ok, I have no Danish data that old.
# bits_hh %>% filter(Country == "DK") %>% distinct(Year)

bits_hh <- bits_hh %>%
  mutate(Ship2 = fct_recode(Ship,
                            "SOL" = "06S1", 
                            "SOL2" = "06SL",
                            "DAN2" = "26D4",
                            "HAF" = "26HF",
                            "HAF" = "26HI",
                            "HAF" = "67BC",
                            "BAL" = "67BC",
                            "ARG" = "77AR",
                            "77SE" = "77SE",
                            "AA36" = "AA36",
                            "KOOT" = "ESLF",
                            "KOH" = "ESTM",
                            "DAR" = "LTDA",
                            "ATLD" = "RUJB",
                            "ATL" = "RUNT"), 
         Ship2 = as.character(Ship2)) %>% 
  mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))

bits_hl <- bits_hl %>%
  mutate(Ship2 = fct_recode(Ship,
                            "SOL" = "06S1", 
                            "SOL2" = "06SL",
                            "DAN2" = "26D4",
                            "HAF" = "26HF",
                            "HAF" = "26HI",
                            "HAF" = "67BC",
                            "BAL" = "67BC",
                            "ARG" = "77AR",
                            "77SE" = "77SE",
                            "AA36" = "AA36",
                            "KOOT" = "ESLF",
                            "KOH" = "ESTM",
                            "DAR" = "LTDA",
                            "ATLD" = "RUJB",
                            "ATL" = "RUNT"), 
         Ship2 = as.character(Ship2)) %>% 
  mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))

bits_ca <- bits_ca %>%
  mutate(Ship2 = fct_recode(Ship,
                            "SOL" = "06S1", 
                            "SOL2" = "06SL",
                            "DAN2" = "26D4",
                            "HAF" = "26HF",
                            "HAF" = "26HI",
                            "HAF" = "67BC",
                            "BAL" = "67BC",
                            "ARG" = "77AR",
                            "77SE" = "77SE",
                            "AA36" = "AA36",
                            "KOOT" = "ESLF",
                            "KOH" = "ESTM",
                            "DAR" = "LTDA",
                            "ATLD" = "RUJB",
                            "ATL" = "RUNT"), 
         Ship2 = as.character(Ship2)) %>% 
  mutate(Ship3 = ifelse(Country == "LV" & Ship2 == "BAL", "BALL", Ship2))

# Ok, which ships are missing in the exchange data?
unique(bits_hh$Ship3)[!unique(bits_hh$Ship3) %in% unique(sweep$Ship)]
#> [1] "LAIZ" "AA36" "06JR" "ARG"  "RUEK" "RUS6" "77MA" "77SE" "ESOR"
# Swedish Ships and unidentified ships are NOT in the Sweep data
unique(sweep$Ship3)[!unique(sweep$Ship3) %in% unique(bits_hh$Ship3)]
#> NULL
# But all Sweep Ships are in the exchange data

Standardize countries

# Now check which country codes are used
sort(unique(sweep$Country))
#> [1] "DEN" "EST" "GFR" "LAT" "LTU" "POL" "RUS" "SWE"
sort(unique(bits_hh$Country))
#> [1] "DE" "DK" "EE" "LT" "LV" "PL" "RU" "SE"

# https://www.nationsonline.org/oneworld/country_code_list.htm#E
bits_hh <- bits_hh %>%
  mutate(Country = fct_recode(Country,
                              "DEN" = "DK",
                              "EST" = "EE",
                              "GFR" = "DE",
                              "LAT" = "LV",
                              "LTU" = "LT",
                              "POL" = "PL",
                              "RUS" = "RU",
                              "SWE" = "SE"),
         Country = as.character(Country))

bits_hl <- bits_hl %>%
  mutate(Country = fct_recode(Country,
                              "DEN" = "DK",
                              "EST" = "EE",
                              "GFR" = "DE",
                              "LAT" = "LV",
                              "LTU" = "LT",
                              "POL" = "PL",
                              "RUS" = "RU",
                              "SWE" = "SE"),
         Country = as.character(Country))

bits_ca <- bits_ca %>%
  mutate(Country = fct_recode(Country,
                              "DEN" = "DK",
                              "EST" = "EE",
                              "GFR" = "DE",
                              "LAT" = "LV",
                              "LTU" = "LT",
                              "POL" = "PL",
                              "RUS" = "RU",
                              "SWE" = "SE"),
         Country = as.character(Country))

# Gear? Are they the same?
sort(unique(bits_hh$Gear))
#>  [1] "DT"  "ESB" "EXP" "FOT" "GOV" "GRT" "H20" "HAK" "LBT" "LPT" "P20" "PEL"
#> [13] "SON" "TVL" "TVS"
sort(unique(bits_hl$Gear))
#>  [1] "DT"  "ESB" "EXP" "FOT" "GOV" "GRT" "H20" "HAK" "LBT" "P20" "PEL" "SON"
#> [13] "TVL" "TVS"
sort(unique(sweep$Gear))
#>  [1] "CAM" "CHP" "DT"  "EGY" "ESB" "EXP" "GRT" "H20" "HAK" "LBT" "LPT" "P20"
#> [13] "PEL" "SON" "TVL" "TVS"

# Which gears are NOT in the sweep data?
unique(bits_hl$Gear)[!unique(bits_hl$Gear) %in% unique(sweep$Gear)] 
#> [1] "GOV" "FOT"

Create a simple haul ID that works across all exchange data

# Create ID column
bits_ca <- bits_ca %>% 
  mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

bits_hl <- bits_hl %>% 
  mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

bits_hh <- bits_hh %>% 
  mutate(IDx = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

# Works like a haul-id
bits_hh %>% group_by(IDx) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1     1

Create the same unique haul-ID in the cpue data that I have in the sweep-file

bits_hl <- bits_hl %>% 
  mutate(haul.id = paste(Year, Quarter, Country, Ship3, Gear, StNo, HaulNo, sep = ":")) 

bits_hh <- bits_hh %>% 
  mutate(haul.id = paste(Year, Quarter, Country, Ship3, Gear, StNo, HaulNo, sep = ":")) 

bits_hh %>% group_by(haul.id) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1     1

Clean DATRAS EXCHANGE data

# Select just valid, additional and no oxygen hauls
bits_hh <- bits_hh %>%
  #filter(!Country == "SWE") %>% # I'll deal with Sweden later...
  filter(HaulVal %in% c("A","N","V"))

# Add ICES rectangle
bits_hh$Rect <- mapplots::ices.rect2(lon = bits_hh$ShootLong, lat = bits_hh$ShootLat)

# Add ICES subdivisions
shape <- shapefile("data/ICES_StatRec_mapto_ICES_Areas/StatRec_map_Areas_Full_20170124.shp")

pts <- SpatialPoints(cbind(bits_hh$ShootLong, bits_hh$ShootLat), 
                     proj4string = CRS(proj4string(shape)))
#> Warning in proj4string(shape): CRS object has comment, which is lost in output

bits_hh$sub_div <- over(pts, shape)$Area_27

# Rename subdivisions to the more common names and do some more filtering (by sub div and area)
sort(unique(bits_hh$sub_div))
#>  [1] "3.a.20"   "3.a.21"   "3.b.23"   "3.c.22"   "3.d.24"   "3.d.25"  
#>  [7] "3.d.26"   "3.d.27"   "3.d.28.1" "3.d.28.2" "3.d.29"

bits_hh <- bits_hh %>% 
  mutate(sub_div = factor(sub_div),
         sub_div = fct_recode(sub_div,
                              "20" = "3.a.20",
                              "21" = "3.a.21",
                              "22" = "3.c.22",
                              "23" = "3.b.23",
                              "24" = "3.d.24",
                              "25" = "3.d.25",
                              "26" = "3.d.26",
                              "27" = "3.d.27",
                              "28" = "3.d.28.1",
                              "28" = "3.d.28.2",
                              "29" = "3.d.29"),
         sub_div = as.character(sub_div)) 

# Now add the fishing line information from the sweep file (we need that later
# to standardize based on gear geometry). We add in the the HH data and then
# transfer it to the other exchange data files when left_joining.
# Check which Fishing lines I have in the sweep data:
fishing_line <- sweep %>% group_by(Gear) %>% distinct(Fishing.line)

bits_hh <- left_join(bits_hh, fishing_line)
# sweep %>% group_by(Gear) %>% distinct(Fishing.line)
# bits_hh %>% group_by(Gear) %>% distinct(Fishing.line)
bits_hh$Fishing.line <- as.numeric(bits_hh$Fishing.line)

# Which gears do now have fishing line?
bits_hh$Fishing.line[is.na(bits_hh$Fishing.line)] <- -9
bits_hh %>% filter(Fishing.line == -9) %>% distinct(Gear)
#>   Gear
#> 1  GRT
#> 2  FOT
#> 3  GOV
#> 4   DT
#> 5  LPT
#> 6  ESB
#> 7  EXP
#> 8  HAK
# 1  GRT
# 2  CAM
# 3  EXP
# 4  FOT
# 5  GOV
# 6  EGY
# 7   DT
# 8  ESB
# 9  HAK

# FROM the index files (Orio, "Research Östersjön 2")
# FOT has 83
# GOV has 160
# ESB ??
# GRT ??
# Rest are unknown and likely not used by Swedish data (therefore their correction
# factors my be in the sweep file)

# Add these values:
bits_hh <- bits_hh %>% mutate(Fishing.line = ifelse(Gear == "FOT", 83, Fishing.line))
bits_hh <- bits_hh %>% mutate(Fishing.line = ifelse(Gear == "GOV", 160, Fishing.line))

# Now select the hauls in the HH data when subsetting the HL data
bits_hl <- bits_hl %>%
  filter(haul.id %in% bits_hh$haul.id)

# Match columns from the HH data to the HL and CA data
sort(unique(bits_hh$sub_div))
#>  [1] "20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
sort(colnames(bits_hh))
#>  [1] "BotCurDir"         "BotCurSpeed"       "BotSal"           
#>  [4] "BotTemp"           "Buoyancy"          "BySpecRecCode"    
#>  [7] "CodendMesh"        "Country"           "DataType"         
#> [10] "DateofCalculation" "Day"               "DayNight"         
#> [13] "Depth"             "DepthStratum"      "Distance"         
#> [16] "DoorSpread"        "DoorSurface"       "DoorType"         
#> [19] "DoorWgt"           "Fishing.line"      "Gear"             
#> [22] "GearEx"            "GroundSpeed"       "haul.id"          
#> [25] "HaulDur"           "HaulLat"           "HaulLong"         
#> [28] "HaulNo"            "HaulVal"           "HydroStNo"        
#> [31] "IDx"               "KiteDim"           "MaxTrawlDepth"    
#> [34] "MinTrawlDepth"     "Month"             "Netopening"       
#> [37] "PelSampType"       "Quarter"           "RecordType"       
#> [40] "Rect"              "Rigging"           "SecchiDepth"      
#> [43] "Ship"              "Ship2"             "Ship3"            
#> [46] "ShootLat"          "ShootLong"         "SpeedWater"       
#> [49] "StatRec"           "StdSpecRecCode"    "StNo"             
#> [52] "sub_div"           "SurCurDir"         "SurCurSpeed"      
#> [55] "SurSal"            "SurTemp"           "Survey"           
#> [58] "SweepLngt"         "SwellDir"          "SwellHeight"      
#> [61] "ThClineDepth"      "ThermoCline"       "Tickler"          
#> [64] "TidePhase"         "TideSpeed"         "TimeShot"         
#> [67] "TowDir"            "Turbidity"         "WarpDen"          
#> [70] "Warpdia"           "Warplngt"          "WgtGroundRope"    
#> [73] "WindDir"           "WindSpeed"         "WingSpread"       
#> [76] "X"                 "Year"

# No NAs for the variables going in to the stomach haul ID
unique(is.na(bits_hh %>% dplyr::select(Year, Quarter, Month, Country, Rect, HaulNo)))
#>       Year Quarter Month Country  Rect HaulNo
#> [1,] FALSE   FALSE FALSE   FALSE FALSE  FALSE

# Before making the id_haul_stomach variable we need to change the country column so that it actually matches the stomach data
# This is the stomach version:
#[1] "LV" "PL" "SE" "DK"
unique(bits_hh$Country)
#> [1] "LAT" "POL" "DEN" "GFR" "SWE" "RUS" "EST" "LTU"

# MAKE SURE THE COUNTRY CODE IS THE SAME! FOR NOW I DON*T USE COUNTRY 2
bits_hh <- bits_hh %>% mutate(Country2 = NA,
                              Country2 = ifelse(Country == "LAT", "LV", Country2),
                              Country2 = ifelse(Country == "POL", "PL", Country2),
                              Country2 = ifelse(Country == "SWE", "SE", Country2),
                              Country2 = ifelse(Country == "DEN", "DK", Country2))

bits_hh_merge <- bits_hh %>% 
  mutate(id_haul_stomach = paste(Year, Quarter, Month, Country, Rect, HaulNo, sep = ".")) %>% 
  dplyr::select(sub_div, Rect, HaulVal, StdSpecRecCode, BySpecRecCode, Fishing.line, Month,
                DataType, HaulDur, GroundSpeed, haul.id, IDx, ShootLat, ShootLong, id_haul_stomach)

bits_hl <- left_join(dplyr::select(bits_hl, -haul.id), bits_hh_merge, by = "IDx")
bits_ca <- left_join(bits_ca, bits_hh_merge, by = "IDx")

# Now filter the subdivisions I want from all data sets
bits_hh <- bits_hh %>% filter(sub_div %in% c(24, 25, 26, 27, 28))
bits_hl <- bits_hl %>% filter(sub_div %in% c(24, 25, 26, 27, 28))
bits_ca <- bits_ca %>% filter(sub_div %in% c(24, 25, 26, 27, 28))
bits_hl %>% filter(Year == 2016 & Quarter == 1 & Month == 2 & Country == "SWE" & Rect == "39G4") %>% distinct(HaulNo)
#> filter: removed 820,236 rows (>99%), 144 rows remaining
#> distinct: removed 143 rows (99%), one row remaining
#>   HaulNo
#> 1      1

Filter species

bits_hl %>% 
  #filter(SpecCodeType == "W") %>% 
  group_by(haul.id, SpecCode) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  group_by(SpecCode) %>% 
  summarise(n = sum(n)) %>% 
  filter(n > 2000) %>% 
  mutate(species = SpecCode,
         species = ifelse(SpecCode %in% c("126417", "161722"), "herring", species),
         species = ifelse(SpecCode %in% c("126425", "161789"), "sprat", species),
         species = ifelse(SpecCode %in% c("126438", "164758"), "whiting", species),
         species = ifelse(SpecCode %in% c("126450", "164748"), "fourbeard_rockling", species),
         species = ifelse(SpecCode %in% c("126736"), "smelt", species),
         species = ifelse(SpecCode %in% c("127123"), "eelpout", species),
         species = ifelse(SpecCode %in% c("127139", "172881"), "dab", species),
         species = ifelse(SpecCode %in% c("127143", "172902"), "plaice", species),
         species = ifelse(SpecCode %in% c("127141", "172894"), "flounder", species),
         species = ifelse(SpecCode %in% c("127149", "616195"), "turbot", species),
         species = ifelse(SpecCode %in% c("127203"), "shorthorn_sculpin", species),
         species = ifelse(SpecCode %in% c("126436", "164712"), "cod", species)
         ) %>% 
  ggplot() +
  geom_bar(aes(reorder(species, desc(n)), n), stat = "identity") + 
  theme(axis.text.x = element_text(angle = 90, size = 11)) + 
  labs(x = "")
#> group_by: 2 grouping variables (haul.id, SpecCode)
#> summarise: now 67,133 rows and 3 columns, one group variable remaining (haul.id)
#> ungroup: no grouping variables
#> group_by: one grouping variable (SpecCode)
#> summarise: now 186 rows and 2 columns, ungrouped
#> filter: removed 165 rows (89%), 21 rows remaining
#> mutate: new variable 'species' (character) with 12 unique values and 0% NA


# Based on this plot, and species we have metabolic index parameters for, we will go ahead with the following species:
# Cod, flounder, plaice, dab


# Plot some species in space!
bits_hl %>% 
  mutate(species = NA,
         species = ifelse(SpecCode %in% c("126436", "164712"), "cod", species),
         species = ifelse(SpecCode %in% c("127141", "172894"), "flounder", species),
         species = ifelse(SpecCode %in% c("127143", "172902"), "plaice", species),
         species = ifelse(SpecCode %in% c("127139", "172881"), "dab", species)) %>% 
  drop_na(species) %>% 
  group_by(Year, haul.id, ShootLong, ShootLat, species) %>% 
  summarise(tot_no = sum(TotalNo)) %>% 
  filter(tot_no > 0) %>% 
  filter(ShootLong > 13.5) %>% 
  ggplot(aes(ShootLong, ShootLat, color = log(tot_no))) + 
  geom_point() +
  facet_grid(species ~ Year)
#> mutate: new variable 'species' (character) with 5 unique values and 45% NA
#> drop_na: removed 365,289 rows (45%), 455,091 rows remaining
#> group_by: 5 grouping variables (Year, haul.id, ShootLong, ShootLat, species)
#> summarise: now 28,108 rows and 6 columns, 4 group variables remaining (Year, haul.id, ShootLong, ShootLat)
#> filter (grouped): removed 6 rows (<1%), 28,102 rows remaining
#> filter (grouped): removed 3,468 rows (12%), 24,634 rows remaining

hlcod <- bits_hl %>%
  filter(SpecCode %in% c("126436", "164712")) %>% 
  mutate(Species = "Gadus morhua")

hlfle <- bits_hl %>%
  filter(SpecCode %in% c("127141", "172894")) %>% 
  mutate(Species = "Platichthys flesus")

hlpla <- bits_hl %>%
  filter(SpecCode %in% c("127143", "172902")) %>%
  mutate(Species = "Pleuronectes platessa")

hldab <- bits_hl %>%
  filter(SpecCode %in% c("127139", "172881")) %>% 
  mutate(Species = "Limanda limanda")

Prepare to add 0 catches

# Find common columns in the HH and HL data (here already subset by species)
comcol <- intersect(names(hlcod), names(bits_hh))

# What is the proportion of zero-catch hauls?
# Here we don't have zero catches
hlcod %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(HLNoAtLngt)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  distinct(zero_catch)
#> # A tibble: 1 × 1
#>   zero_catch
#>   <chr>     
#> 1 N

# Cod: Add 0s and then remove lines with SpecVal = 0 (first NA because we don't have a match in the HH, then make them 0 later)
hlcod0 <- full_join(hlcod, bits_hh[, comcol], by = comcol)

# No zeroes yet
hlcod0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(HLNoAtLngt)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  distinct(zero_catch) 
#> # A tibble: 1 × 1
#>   zero_catch
#>   <lgl>     
#> 1 NA

hlcod0$SpecVal[is.na(hlcod0$SpecVal)] <- "zeroCatch"

hlcod0$SpecVal <- factor(hlcod0$SpecVal)

hlcod0 <- hlcod0 %>% filter(!SpecVal == "0")

# Add species again after merge
hlcod0$Species <- "Gadus morhua"

# Flounder: Add 0s, remove them if StdSpecRecCode !=1 and then remove lines with SpecVal = 0
hlfle0 <- full_join(hlfle, bits_hh[, comcol], by = comcol)

hlfle0 <- hlfle0[!(is.na(hlfle0$Species) & hlfle0$StdSpecRecCode != 1), ] 

hlfle0$SpecVal[is.na(hlfle0$SpecVal)] <- "zeroCatch"
hlfle0$SpecVal <- factor(hlfle0$SpecVal)

hlfle0 <- hlfle0 %>% filter(!SpecVal == "0")

hlfle0$Species<-"Platichthys flesus"

# Plaice: Add 0s, remove them if StdSpecRecCode !=1 and then remove lines with SpecVal = 0
hlpla0 <- full_join(hlpla, bits_hh[, comcol], by = comcol)

hlpla0 <- hlpla0[!(is.na(hlpla0$Species) & hlpla0$StdSpecRecCode != 1), ] 

hlpla0$SpecVal[is.na(hlpla0$SpecVal)] <- "zeroCatch"
hlpla0$SpecVal <- factor(hlpla0$SpecVal)

hlpla0 <- hlpla0 %>% filter(!SpecVal == "0")

hlpla0$Species<-"Pleuronectes platessa"

# Dab: Add 0s, remove them if StdSpecRecCode !=1 and then remove lines with SpecVal = 0
hldab0 <- full_join(hldab, bits_hh[, comcol], by = comcol)

hldab0 <- hldab0[!(is.na(hldab0$Species) & hldab0$StdSpecRecCode != 1), ] 

hldab0$SpecVal[is.na(hldab0$SpecVal)] <- "zeroCatch"
hldab0$SpecVal <- factor(hldab0$SpecVal)

hldab0 <- hldab0 %>% filter(!SpecVal == "0")

hldab0$Species<-"Limanda limanda"

# Check number of hauls per species
hlcod0 %>% distinct(haul.id) %>% nrow()
#> [1] 12254
hlfle0 %>% distinct(haul.id) %>% nrow()
#> [1] 12016
hlpla0 %>% distinct(haul.id) %>% nrow()
#> [1] 11547
hldab0 %>% distinct(haul.id) %>% nrow()
#> [1] 11477

Create (unstandardized) CPUE for SpecVal=1. If DataType=C then CPUEun=HLNoAtLngt, if DataType=R then CPUEun=HLNoAtLngt/(HaulDur/60), if DataType=S then CPUEun=(HLNoAtLngt*SubFactor)/(HaulDur/60). If SpecVal="zeroCatch" then CPUEun=0, if SpecVal=4 we need to decide (no length measurements, only total catch). Note that here we also add zero CPUE if SpecVal=="zeroCatch".

Then I will sum for the same haul the CPUE of the same length classes if they were sampled with different subfactors or with different sexes.

# Cod
hlcod0 <- hlcod0 %>%
  mutate(CPUEun = ifelse(SpecVal == "1" & DataType == "C",
                         HLNoAtLngt,
                         
                         ifelse(SpecVal == "1" & DataType == "R",
                                HLNoAtLngt/(HaulDur/60),
                                
                                ifelse(SpecVal == "1" & DataType == "S",
                                       (HLNoAtLngt*SubFactor)/(HaulDur/60),
                                       
                                       ifelse(SpecVal == "zeroCatch", 0, NA)))))

# Plot and fill by zero catch
hlcod0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(!CPUEun_haul == 0)
#> # A tibble: 10,268 × 3
#>    haul.id                     Year CPUEun_haul
#>    <chr>                      <int>       <dbl>
#>  1 1993:1:DEN:DAN2:GRT:105:45  1993          37
#>  2 1993:1:DEN:DAN2:GRT:106:46  1993           2
#>  3 1993:1:DEN:DAN2:GRT:107:47  1993          34
#>  4 1993:1:DEN:DAN2:GRT:108:48  1993           5
#>  5 1993:1:DEN:DAN2:GRT:109:49  1993          24
#>  6 1993:1:DEN:DAN2:GRT:11:6    1993         135
#>  7 1993:1:DEN:DAN2:GRT:110:50  1993          40
#>  8 1993:1:DEN:DAN2:GRT:112:51  1993          32
#>  9 1993:1:DEN:DAN2:GRT:113:52  1993          12
#> 10 1993:1:DEN:DAN2:GRT:114:53  1993          16
#> # … with 10,258 more rows

hlcod0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
  geom_bar(stat = "identity")


# Some rows have multiple rows per combination of length class and haul id (i suppose often because it's split by sex), so we need to sum it up 
hlcod0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
#> # A tibble: 2 × 1
#>       n
#>   <int>
#> 1     1
#> 2     2
hlcod0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2) %>% as.data.frame() %>% head(20)
#>         X RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType
#> 1  205495         HL   BITS       1     RUS RUJB  HAK        NA   <NA>       NA
#> 2  300422         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#> 3  300423         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#> 4  325369         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 5  325375         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 6  326062         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 7  326064         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 8  326065         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 9  326066         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 10 326067         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 11 326068         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 12 326069         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 13 326071         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 14 326073         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 15 326077         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 16 326079         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 17 326080         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 18 326082         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 19 326083         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#> 20 326085         HL   BITS       4     DEN 26D4  TVL        NA      R       NA
#>    StNo HaulNo Year SpecCodeType SpecCode SpecVal  Sex TotalNo CatIdentifier
#> 1  <NA>     10 1998            T   164712       4 <NA>      -9             1
#> 2    14     14 2000            T   164712       1    M       2             1
#> 3    14     14 2000            T   164712       1    F       4             1
#> 4    63     31 2001            T   164712       1    M       5             1
#> 5    63     31 2001            T   164712       1    F       7             1
#> 6    83     37 2001            T   164712       1    M      21             1
#> 7    83     37 2001            T   164712       1    M      21             1
#> 8    83     37 2001            T   164712       1    M      21             1
#> 9    83     37 2001            T   164712       1    M      21             1
#> 10   83     37 2001            T   164712       1    M      21             1
#> 11   83     37 2001            T   164712       1    M      21             1
#> 12   83     37 2001            T   164712       1    M      21             1
#> 13   83     37 2001            T   164712       1    M      21             1
#> 14   83     37 2001            T   164712       1    M      21             1
#> 15   83     37 2001            T   164712       1    F      42             1
#> 16   83     37 2001            T   164712       1    F      42             1
#> 17   83     37 2001            T   164712       1    F      42             1
#> 18   83     37 2001            T   164712       1    F      42             1
#> 19   83     37 2001            T   164712       1    F      42             1
#> 20   83     37 2001            T   164712       1    F      42             1
#>    NoMeas SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1      NA         1     NA          NA     <NA>        NA         -9     <NA>
#> 2       3         1     NA          32        1        35          2     <NA>
#> 3       3         1     NA          32        1        35          2     <NA>
#> 4       5         1     NA        3142        1        22          2     <NA>
#> 5       7         1     NA        3142        1        22          1     <NA>
#> 6      21         1     NA       64261        1        22          1     <NA>
#> 7      21         1     NA       64261        1        38          2     <NA>
#> 8      21         1     NA       64261        1        39          2     <NA>
#> 9      21         1     NA       64261        1        41          1     <NA>
#> 10     21         1     NA       64261        1        42          1     <NA>
#> 11     21         1     NA       64261        1        44          2     <NA>
#> 12     21         1     NA       64261        1        45          4     <NA>
#> 13     21         1     NA       64261        1        48          2     <NA>
#> 14     21         1     NA       64261        1        52          1     <NA>
#> 15     42         1     NA       64261        1        22          1     <NA>
#> 16     42         1     NA       64261        1        38          2     <NA>
#> 17     42         1     NA       64261        1        39          2     <NA>
#> 18     42         1     NA       64261        1        41          1     <NA>
#> 19     42         1     NA       64261        1        42          4     <NA>
#> 20     42         1     NA       64261        1        44          3     <NA>
#>    LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1           NA          20140617      126436  ATLD  ATLD
#> 2           NA          20131112      126436  KOOT  KOOT
#> 3           NA          20131112      126436  KOOT  KOOT
#> 4           NA          20131113      126436  DAN2  DAN2
#> 5           NA          20131113      126436  DAN2  DAN2
#> 6           NA          20131113      126436  DAN2  DAN2
#> 7           NA          20131113      126436  DAN2  DAN2
#> 8           NA          20131113      126436  DAN2  DAN2
#> 9           NA          20131113      126436  DAN2  DAN2
#> 10          NA          20131113      126436  DAN2  DAN2
#> 11          NA          20131113      126436  DAN2  DAN2
#> 12          NA          20131113      126436  DAN2  DAN2
#> 13          NA          20131113      126436  DAN2  DAN2
#> 14          NA          20131113      126436  DAN2  DAN2
#> 15          NA          20131113      126436  DAN2  DAN2
#> 16          NA          20131113      126436  DAN2  DAN2
#> 17          NA          20131113      126436  DAN2  DAN2
#> 18          NA          20131113      126436  DAN2  DAN2
#> 19          NA          20131113      126436  DAN2  DAN2
#> 20          NA          20131113      126436  DAN2  DAN2
#>                          IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1  1998.1.RUS.RUJB.HAK.NA.10      26 38G9       V              1             1
#> 2  2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 3  2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 4  2001.4.DEN.26D4.TVL.63.31      26 39G8       V              1             1
#> 5  2001.4.DEN.26D4.TVL.63.31      26 39G8       V              1             1
#> 6  2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 7  2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 8  2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 9  2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 10 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 11 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 12 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 13 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 14 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 15 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 16 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 17 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 18 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 19 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#> 20 2001.4.DEN.26D4.TVL.83.37      26 40G8       V              1             1
#>    Fishing.line Month DataType HaulDur GroundSpeed                   haul.id
#> 1         -9.00     3        C      30         3.8 1998:1:RUS:ATLD:HAK:NA:10
#> 2         33.22    11        C      30         3.0 2000:4:EST:KOOT:TVS:14:14
#> 3         33.22    11        C      30         3.0 2000:4:EST:KOOT:TVS:14:14
#> 4         63.46    11        R      30         3.0 2001:4:DEN:DAN2:TVL:63:31
#> 5         63.46    11        R      30         3.0 2001:4:DEN:DAN2:TVL:63:31
#> 6         63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 7         63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 8         63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 9         63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 10        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 11        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 12        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 13        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 14        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 15        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 16        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 17        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 18        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 19        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#> 20        63.46    11        R      31         3.1 2001:4:DEN:DAN2:TVL:83:37
#>    ShootLat ShootLong       id_haul_stomach      Species   CPUEun n
#> 1   54.6333   19.6500  1998.1.3.RUS.38G9.10 Gadus morhua       NA 2
#> 2   58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua 2.000000 2
#> 3   58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua 2.000000 2
#> 4   55.4699   18.3116 2001.4.11.DEN.39G8.31 Gadus morhua 4.000000 2
#> 5   55.4699   18.3116 2001.4.11.DEN.39G8.31 Gadus morhua 2.000000 2
#> 6   55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 1.935484 2
#> 7   55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 3.870968 2
#> 8   55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 3.870968 2
#> 9   55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 1.935484 2
#> 10  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 1.935484 2
#> 11  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 3.870968 2
#> 12  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 7.741935 2
#> 13  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 3.870968 2
#> 14  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 1.935484 2
#> 15  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 1.935484 2
#> 16  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 3.870968 2
#> 17  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 3.870968 2
#> 18  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 1.935484 2
#> 19  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 7.741935 2
#> 20  55.6627   18.0414 2001.4.11.DEN.40G8.37 Gadus morhua 5.806452 2
test <- hlcod0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2)
test_id <- test$haul.id[2]

hlcodL <- hlcod0 %>% 
  group_by(LngtClass, haul.id) %>% 
  mutate(CPUEun = sum(CPUEun)) %>%
  ungroup() %>% 
  mutate(id3 = paste(haul.id, LngtClass)) %>% 
  distinct(id3, .keep_all = TRUE) %>% 
  dplyr::select(-X, -id3) # Clean up a bit

# Check with an ID
filter(hlcod0, haul.id == test_id)
#>        X RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType
#> 1 300422         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#> 2 300423         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#> 3 300424         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#> 4   4998         HH   BITS       4     EST ESLF  TVS        NA   <NA>       NA
#>   StNo HaulNo Year SpecCodeType SpecCode   SpecVal  Sex TotalNo CatIdentifier
#> 1   14     14 2000            T   164712         1    M       2             1
#> 2   14     14 2000            T   164712         1    F       4             1
#> 3   14     14 2000            T   164712         1    F       4             1
#> 4   14     14 2000         <NA>       NA zeroCatch <NA>      NA            NA
#>   NoMeas SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1      3         1     NA          32        1        35          2     <NA>
#> 2      3         1     NA          32        1        35          2     <NA>
#> 3      3         1     NA          32        1        39          2     <NA>
#> 4     NA        NA     NA          NA     <NA>        NA         NA     <NA>
#>   LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1          NA          20131112      126436  KOOT  KOOT
#> 2          NA          20131112      126436  KOOT  KOOT
#> 3          NA          20131112      126436  KOOT  KOOT
#> 4          NA          20220301          NA  KOOT  KOOT
#>                         IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 2 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 3 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 4 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#>   Fishing.line Month DataType HaulDur GroundSpeed                   haul.id
#> 1        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#> 2        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#> 3        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#> 4        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#>   ShootLat ShootLong       id_haul_stomach      Species CPUEun
#> 1  58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua      2
#> 2  58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua      2
#> 3  58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua      2
#> 4  58.0167   21.0833                  <NA> Gadus morhua      0
filter(hlcodL, haul.id == test_id) %>% as.data.frame()
#>   RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType StNo
#> 1         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA   14
#> 2         HL   BITS       4     EST ESLF  TVS        NA   <NA>       NA   14
#> 3         HH   BITS       4     EST ESLF  TVS        NA   <NA>       NA   14
#>   HaulNo Year SpecCodeType SpecCode   SpecVal  Sex TotalNo CatIdentifier NoMeas
#> 1     14 2000            T   164712         1    M       2             1      3
#> 2     14 2000            T   164712         1    F       4             1      3
#> 3     14 2000         <NA>       NA zeroCatch <NA>      NA            NA     NA
#>   SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1         1     NA          32        1        35          2     <NA>
#> 2         1     NA          32        1        39          2     <NA>
#> 3        NA     NA          NA     <NA>        NA         NA     <NA>
#>   LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1          NA          20131112      126436  KOOT  KOOT
#> 2          NA          20131112      126436  KOOT  KOOT
#> 3          NA          20220301          NA  KOOT  KOOT
#>                         IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 2 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#> 3 2000.4.EST.ESLF.TVS.14.14      28 45H1       V              1             1
#>   Fishing.line Month DataType HaulDur GroundSpeed                   haul.id
#> 1        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#> 2        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#> 3        33.22    11        C      30           3 2000:4:EST:KOOT:TVS:14:14
#>   ShootLat ShootLong       id_haul_stomach      Species CPUEun
#> 1  58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua      4
#> 2  58.0167   21.0833 2000.4.11.EST.45H1.14 Gadus morhua      2
#> 3  58.0167   21.0833                  <NA> Gadus morhua      0

# Do we still have 0 catches?
hlcodL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(!CPUEun_haul == 0)
#> # A tibble: 10,268 × 3
#>    haul.id                     Year CPUEun_haul
#>    <chr>                      <int>       <dbl>
#>  1 1993:1:DEN:DAN2:GRT:105:45  1993          37
#>  2 1993:1:DEN:DAN2:GRT:106:46  1993           2
#>  3 1993:1:DEN:DAN2:GRT:107:47  1993          34
#>  4 1993:1:DEN:DAN2:GRT:108:48  1993           5
#>  5 1993:1:DEN:DAN2:GRT:109:49  1993          24
#>  6 1993:1:DEN:DAN2:GRT:11:6    1993         135
#>  7 1993:1:DEN:DAN2:GRT:110:50  1993          40
#>  8 1993:1:DEN:DAN2:GRT:112:51  1993          32
#>  9 1993:1:DEN:DAN2:GRT:113:52  1993          12
#> 10 1993:1:DEN:DAN2:GRT:114:53  1993          16
#> # … with 10,258 more rows

hlcodL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
  geom_bar(stat = "identity")



# Flounder
hlfle0 <- hlfle0 %>%
  mutate(CPUEun = ifelse(SpecVal == "1" & DataType == "C",
                         HLNoAtLngt,
                         
                         ifelse(SpecVal == "1" & DataType == "R",
                                HLNoAtLngt/(HaulDur/60),
                                
                                ifelse(SpecVal == "1" & DataType == "S",
                                       (HLNoAtLngt*SubFactor)/(HaulDur/60),
                                       
                                       ifelse(SpecVal == "zeroCatch", 0, NA)))))

# Plot and fill by zero catch
hlfle0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(!CPUEun_haul == 0)
#> # A tibble: 10,013 × 3
#>    haul.id                     Year CPUEun_haul
#>    <chr>                      <int>       <dbl>
#>  1 1993:1:DEN:DAN2:GRT:105:45  1993        2500
#>  2 1993:1:DEN:DAN2:GRT:106:46  1993           8
#>  3 1993:1:DEN:DAN2:GRT:107:47  1993          26
#>  4 1993:1:DEN:DAN2:GRT:108:48  1993          34
#>  5 1993:1:DEN:DAN2:GRT:109:49  1993           8
#>  6 1993:1:DEN:DAN2:GRT:11:6    1993          11
#>  7 1993:1:DEN:DAN2:GRT:110:50  1993           8
#>  8 1993:1:DEN:DAN2:GRT:114:53  1993           1
#>  9 1993:1:DEN:DAN2:GRT:115:54  1993           3
#> 10 1993:1:DEN:DAN2:GRT:117:56  1993         729
#> # … with 10,003 more rows

hlfle0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
  geom_bar(stat = "identity")


# Some rows have multiple rows per combination of length class and haul id (i suppose often because it's split by sex), so we need to sum it up 
hlfle0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
#> # A tibble: 3 × 1
#>       n
#>   <int>
#> 1     1
#> 2     2
#> 3     3
hlfle0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2) %>% as.data.frame() %>% head(20)
#>        X RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType
#> 1  98168         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 2  98169         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 3  98374         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 4  98375         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 5  98378         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 6  98379         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 7  98380         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 8  98383         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 9  98582         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 10 98584         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 11 98585         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 12 98586         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 13 98587         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 14 98588         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 15 98589         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 16 98590         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 17 98591         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 18 98592         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 19 98593         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 20 98594         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#>    StNo HaulNo Year SpecCodeType SpecCode SpecVal Sex TotalNo CatIdentifier
#> 1    25      3 1995            W   127141       1   M       8             1
#> 2    25      3 1995            W   127141       1   F       6             1
#> 3    27      6 1995            W   127141       1   M      18             1
#> 4    27      6 1995            W   127141       1   M      18             1
#> 5    27      6 1995            W   127141       1   M      18             1
#> 6    27      6 1995            W   127141       1   F      24             1
#> 7    27      6 1995            W   127141       1   F      24             1
#> 8    27      6 1995            W   127141       1   F      24             1
#> 9    32     31 1995            W   127141       1   M      38             1
#> 10   32     31 1995            W   127141       1   M      38             1
#> 11   32     31 1995            W   127141       1   M      38             1
#> 12   32     31 1995            W   127141       1   M      38             1
#> 13   32     31 1995            W   127141       1   M      38             1
#> 14   32     31 1995            W   127141       1   M      38             1
#> 15   32     31 1995            W   127141       1   M      38             1
#> 16   32     31 1995            W   127141       1   F      36             1
#> 17   32     31 1995            W   127141       1   F      36             1
#> 18   32     31 1995            W   127141       1   F      36             1
#> 19   32     31 1995            W   127141       1   F      36             1
#> 20   32     31 1995            W   127141       1   F      36             1
#>    NoMeas SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1       7         1     NA          36        1        28          2     <NA>
#> 2       7         1     NA          36        1        28          2     <NA>
#> 3      21         1     NA          64        1        20          2     <NA>
#> 4      21         1     NA          64        1        21          6     <NA>
#> 5      21         1     NA          64        1        26          2     <NA>
#> 6      21         1     NA          64        1        20          4     <NA>
#> 7      21         1     NA          64        1        21          4     <NA>
#> 8      21         1     NA          64        1        26          2     <NA>
#> 9      37         1     NA         217        1        24          2     <NA>
#> 10     37         1     NA         217        1        26          2     <NA>
#> 11     37         1     NA         217        1        27          8     <NA>
#> 12     37         1     NA         217        1        28          4     <NA>
#> 13     37         1     NA         217        1        29          2     <NA>
#> 14     37         1     NA         217        1        31          2     <NA>
#> 15     37         1     NA         217        1        34          4     <NA>
#> 16     37         1     NA         217        1        24          2     <NA>
#> 17     37         1     NA         217        1        26          4     <NA>
#> 18     37         1     NA         217        1        27          4     <NA>
#> 19     37         1     NA         217        1        28          6     <NA>
#> 20     37         1     NA         217        1        29          6     <NA>
#>    LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1           NA          20190207      127141   SOL   SOL
#> 2           NA          20190207      127141   SOL   SOL
#> 3           NA          20190207      127141   SOL   SOL
#> 4           NA          20190207      127141   SOL   SOL
#> 5           NA          20190207      127141   SOL   SOL
#> 6           NA          20190207      127141   SOL   SOL
#> 7           NA          20190207      127141   SOL   SOL
#> 8           NA          20190207      127141   SOL   SOL
#> 9           NA          20190207      127141   SOL   SOL
#> 10          NA          20190207      127141   SOL   SOL
#> 11          NA          20190207      127141   SOL   SOL
#> 12          NA          20190207      127141   SOL   SOL
#> 13          NA          20190207      127141   SOL   SOL
#> 14          NA          20190207      127141   SOL   SOL
#> 15          NA          20190207      127141   SOL   SOL
#> 16          NA          20190207      127141   SOL   SOL
#> 17          NA          20190207      127141   SOL   SOL
#> 18          NA          20190207      127141   SOL   SOL
#> 19          NA          20190207      127141   SOL   SOL
#> 20          NA          20190207      127141   SOL   SOL
#>                          IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1   1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 2   1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 3   1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 4   1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 5   1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 6   1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 7   1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 8   1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 9  1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 10 1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 11 1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 12 1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 13 1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 14 1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 15 1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 16 1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 17 1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 18 1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 19 1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 20 1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#>    Fishing.line Month DataType HaulDur GroundSpeed                  haul.id
#> 1            36     2        C      30         4.0  1995:1:GFR:SOL:H20:25:3
#> 2            36     2        C      30         4.0  1995:1:GFR:SOL:H20:25:3
#> 3            36     2        C      30         3.6  1995:1:GFR:SOL:H20:27:6
#> 4            36     2        C      30         3.6  1995:1:GFR:SOL:H20:27:6
#> 5            36     2        C      30         3.6  1995:1:GFR:SOL:H20:27:6
#> 6            36     2        C      30         3.6  1995:1:GFR:SOL:H20:27:6
#> 7            36     2        C      30         3.6  1995:1:GFR:SOL:H20:27:6
#> 8            36     2        C      30         3.6  1995:1:GFR:SOL:H20:27:6
#> 9            36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 10           36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 11           36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 12           36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 13           36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 14           36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 15           36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 16           36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 17           36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 18           36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 19           36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 20           36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#>    ShootLat ShootLong      id_haul_stomach            Species CPUEun n
#> 1   54.7167      13.1  1995.1.2.GFR.38G3.3 Platichthys flesus      2 2
#> 2   54.7167      13.1  1995.1.2.GFR.38G3.3 Platichthys flesus      2 2
#> 3   54.5167      13.8  1995.1.2.GFR.38G3.6 Platichthys flesus      2 2
#> 4   54.5167      13.8  1995.1.2.GFR.38G3.6 Platichthys flesus      6 2
#> 5   54.5167      13.8  1995.1.2.GFR.38G3.6 Platichthys flesus      2 2
#> 6   54.5167      13.8  1995.1.2.GFR.38G3.6 Platichthys flesus      4 2
#> 7   54.5167      13.8  1995.1.2.GFR.38G3.6 Platichthys flesus      4 2
#> 8   54.5167      13.8  1995.1.2.GFR.38G3.6 Platichthys flesus      2 2
#> 9   54.7333      13.4 1995.1.2.GFR.38G3.31 Platichthys flesus      2 2
#> 10  54.7333      13.4 1995.1.2.GFR.38G3.31 Platichthys flesus      2 2
#> 11  54.7333      13.4 1995.1.2.GFR.38G3.31 Platichthys flesus      8 2
#> 12  54.7333      13.4 1995.1.2.GFR.38G3.31 Platichthys flesus      4 2
#> 13  54.7333      13.4 1995.1.2.GFR.38G3.31 Platichthys flesus      2 2
#> 14  54.7333      13.4 1995.1.2.GFR.38G3.31 Platichthys flesus      2 2
#> 15  54.7333      13.4 1995.1.2.GFR.38G3.31 Platichthys flesus      4 2
#> 16  54.7333      13.4 1995.1.2.GFR.38G3.31 Platichthys flesus      2 2
#> 17  54.7333      13.4 1995.1.2.GFR.38G3.31 Platichthys flesus      4 2
#> 18  54.7333      13.4 1995.1.2.GFR.38G3.31 Platichthys flesus      4 2
#> 19  54.7333      13.4 1995.1.2.GFR.38G3.31 Platichthys flesus      6 2
#> 20  54.7333      13.4 1995.1.2.GFR.38G3.31 Platichthys flesus      6 2
test <- hlfle0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2)
test_id <- test$haul.id[2]

hlfleL <- hlfle0 %>% 
  group_by(LngtClass, haul.id) %>% 
  mutate(CPUEun = sum(CPUEun)) %>%
  ungroup() %>% 
  mutate(id3 = paste(haul.id, LngtClass)) %>% 
  distinct(id3, .keep_all = TRUE) %>% 
  dplyr::select(-X, -id3) # Clean up a bit

# Check with an ID
filter(hlfle0, haul.id == test_id)
#>       X RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType
#> 1 98165         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 2 98166         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 3 98167         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 4 98168         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 5 98169         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 6 98170         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 7 98171         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 8  1873         HH   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#>   StNo HaulNo Year SpecCodeType SpecCode   SpecVal  Sex TotalNo CatIdentifier
#> 1   25      3 1995            W   127141         1    M       8             1
#> 2   25      3 1995            W   127141         1    M       8             1
#> 3   25      3 1995            W   127141         1    M       8             1
#> 4   25      3 1995            W   127141         1    M       8             1
#> 5   25      3 1995            W   127141         1    F       6             1
#> 6   25      3 1995            W   127141         1    F       6             1
#> 7   25      3 1995            W   127141         1    F       6             1
#> 8   25      3 1995         <NA>       NA zeroCatch <NA>      NA            NA
#>   NoMeas SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1      7         1     NA          36        1        22          2     <NA>
#> 2      7         1     NA          36        1        24          2     <NA>
#> 3      7         1     NA          36        1        27          2     <NA>
#> 4      7         1     NA          36        1        28          2     <NA>
#> 5      7         1     NA          36        1        28          2     <NA>
#> 6      7         1     NA          36        1        29          2     <NA>
#> 7      7         1     NA          36        1        30          2     <NA>
#> 8     NA        NA     NA          NA     <NA>        NA         NA     <NA>
#>   LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1          NA          20190207      127141   SOL   SOL
#> 2          NA          20190207      127141   SOL   SOL
#> 3          NA          20190207      127141   SOL   SOL
#> 4          NA          20190207      127141   SOL   SOL
#> 5          NA          20190207      127141   SOL   SOL
#> 6          NA          20190207      127141   SOL   SOL
#> 7          NA          20190207      127141   SOL   SOL
#> 8          NA          20220223          NA   SOL   SOL
#>                        IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 2 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 3 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 4 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 5 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 6 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 7 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 8 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#>   Fishing.line Month DataType HaulDur GroundSpeed                 haul.id
#> 1           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 2           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 3           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 4           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 5           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 6           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 7           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 8           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#>   ShootLat ShootLong     id_haul_stomach            Species CPUEun
#> 1  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      2
#> 2  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      2
#> 3  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      2
#> 4  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      2
#> 5  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      2
#> 6  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      2
#> 7  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      2
#> 8  54.7167      13.1                <NA> Platichthys flesus      0
filter(hlfleL, haul.id == test_id) %>% as.data.frame()
#>   RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType StNo
#> 1         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 2         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 3         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 4         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 5         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 6         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 7         HH   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#>   HaulNo Year SpecCodeType SpecCode   SpecVal  Sex TotalNo CatIdentifier NoMeas
#> 1      3 1995            W   127141         1    M       8             1      7
#> 2      3 1995            W   127141         1    M       8             1      7
#> 3      3 1995            W   127141         1    M       8             1      7
#> 4      3 1995            W   127141         1    M       8             1      7
#> 5      3 1995            W   127141         1    F       6             1      7
#> 6      3 1995            W   127141         1    F       6             1      7
#> 7      3 1995         <NA>       NA zeroCatch <NA>      NA            NA     NA
#>   SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1         1     NA          36        1        22          2     <NA>
#> 2         1     NA          36        1        24          2     <NA>
#> 3         1     NA          36        1        27          2     <NA>
#> 4         1     NA          36        1        28          2     <NA>
#> 5         1     NA          36        1        29          2     <NA>
#> 6         1     NA          36        1        30          2     <NA>
#> 7        NA     NA          NA     <NA>        NA         NA     <NA>
#>   LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1          NA          20190207      127141   SOL   SOL
#> 2          NA          20190207      127141   SOL   SOL
#> 3          NA          20190207      127141   SOL   SOL
#> 4          NA          20190207      127141   SOL   SOL
#> 5          NA          20190207      127141   SOL   SOL
#> 6          NA          20190207      127141   SOL   SOL
#> 7          NA          20220223          NA   SOL   SOL
#>                        IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 2 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 3 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 4 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 5 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 6 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#> 7 1995.1.GFR.06S1.H20.25.3      24 38G3       V              1             1
#>   Fishing.line Month DataType HaulDur GroundSpeed                 haul.id
#> 1           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 2           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 3           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 4           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 5           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 6           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#> 7           36     2        C      30           4 1995:1:GFR:SOL:H20:25:3
#>   ShootLat ShootLong     id_haul_stomach            Species CPUEun
#> 1  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      2
#> 2  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      2
#> 3  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      2
#> 4  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      4
#> 5  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      2
#> 6  54.7167      13.1 1995.1.2.GFR.38G3.3 Platichthys flesus      2
#> 7  54.7167      13.1                <NA> Platichthys flesus      0

# Do we still have 0 catches?
hlfleL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(!CPUEun_haul == 0)
#> # A tibble: 10,013 × 3
#>    haul.id                     Year CPUEun_haul
#>    <chr>                      <int>       <dbl>
#>  1 1993:1:DEN:DAN2:GRT:105:45  1993        2500
#>  2 1993:1:DEN:DAN2:GRT:106:46  1993           8
#>  3 1993:1:DEN:DAN2:GRT:107:47  1993          26
#>  4 1993:1:DEN:DAN2:GRT:108:48  1993          34
#>  5 1993:1:DEN:DAN2:GRT:109:49  1993           8
#>  6 1993:1:DEN:DAN2:GRT:11:6    1993          11
#>  7 1993:1:DEN:DAN2:GRT:110:50  1993           8
#>  8 1993:1:DEN:DAN2:GRT:114:53  1993           1
#>  9 1993:1:DEN:DAN2:GRT:115:54  1993           3
#> 10 1993:1:DEN:DAN2:GRT:117:56  1993         729
#> # … with 10,003 more rows

hlfleL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
  geom_bar(stat = "identity")



# Plaice
hlpla0 <- hlpla0 %>%
  mutate(CPUEun = ifelse(SpecVal == "1" & DataType == "C",
                         HLNoAtLngt,
                         
                         ifelse(SpecVal == "1" & DataType == "R",
                                HLNoAtLngt/(HaulDur/60),
                                
                                ifelse(SpecVal == "1" & DataType == "S",
                                       (HLNoAtLngt*SubFactor)/(HaulDur/60),
                                       
                                       ifelse(SpecVal == "zeroCatch", 0, NA)))))

# Plot and fill by zero catch
hlpla0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(!CPUEun_haul == 0)
#> # A tibble: 5,568 × 3
#>    haul.id                     Year CPUEun_haul
#>    <chr>                      <int>       <dbl>
#>  1 1993:1:DEN:DAN2:GRT:11:6    1993           3
#>  2 1993:1:DEN:DAN2:GRT:13:7    1993           1
#>  3 1993:1:DEN:DAN2:GRT:146:68  1993           1
#>  4 1993:1:DEN:DAN2:GRT:160:70  1993           2
#>  5 1993:1:DEN:DAN2:GRT:161:71  1993           2
#>  6 1993:1:DEN:DAN2:GRT:17:9    1993           1
#>  7 1993:1:DEN:DAN2:GRT:23:12   1993           1
#>  8 1993:1:DEN:DAN2:GRT:45:21   1993           2
#>  9 1993:1:DEN:DAN2:GRT:48:22   1993           1
#> 10 1993:1:DEN:DAN2:GRT:55:26   1993           3
#> # … with 5,558 more rows

hlpla0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
  geom_bar(stat = "identity")


# Some rows have multiple rows per combination of length class and haul id (i suppose often because it's split by sex), so we need to sum it up 
hlpla0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
#> # A tibble: 2 × 1
#>       n
#>   <int>
#> 1     1
#> 2     2
hlpla0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2) %>% as.data.frame() %>% head(20)
#>         X RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType
#> 1   99640         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 2   99642         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 3  100042         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 4  100044         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 5  100318         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 6  100320         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 7  100322         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 8  100324         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 9  100325         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 10 100326         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 11 100327         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 12 100329         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 13 100543         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 14 100545         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 15 100673         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 16 100674         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 17 100675         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 18 100676         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 19 100677         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 20 100678         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#>    StNo HaulNo Year SpecCodeType SpecCode SpecVal Sex TotalNo CatIdentifier
#> 1    39     49 1995            W   127143       1   M       8             1
#> 2    39     49 1995            W   127143       1   F      18             1
#> 3    42     35 1995            W   127143       1   M       2             1
#> 4    42     35 1995            W   127143       1   F       6             1
#> 5    44     39 1995            W   127143       1   M     112             1
#> 6    44     39 1995            W   127143       1   M     112             1
#> 7    44     39 1995            W   127143       1   M     112             1
#> 8    44     39 1995            W   127143       1   M     112             1
#> 9    44     39 1995            W   127143       1   F      24             1
#> 10   44     39 1995            W   127143       1   F      24             1
#> 11   44     39 1995            W   127143       1   F      24             1
#> 12   44     39 1995            W   127143       1   F      24             1
#> 13   45     45 1995            W   127143       1   M     147             1
#> 14   45     45 1995            W   127143       1   F      18             1
#> 15   46     59 1995            W   127143       1   M      72             1
#> 16   46     59 1995            W   127143       1   M      72             1
#> 17   46     59 1995            W   127143       1   M      72             1
#> 18   46     59 1995            W   127143       1   M      72             1
#> 19   46     59 1995            W   127143       1   M      72             1
#> 20   46     59 1995            W   127143       1   M      72             1
#>    NoMeas SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1      13         1     NA          61        1        28          2     <NA>
#> 2      13         1     NA          61        1        28          4     <NA>
#> 3       4         1     NA          18        1        27          2     <NA>
#> 4       4         1     NA          18        1        27          2     <NA>
#> 5      68         1     NA         312        1        26         12     <NA>
#> 6      68         1     NA         312        1        28         12     <NA>
#> 7      68         1     NA         312        1        30          6     <NA>
#> 8      68         1     NA         312        1        34          2     <NA>
#> 9      68         1     NA         312        1        26          2     <NA>
#> 10     68         1     NA         312        1        28          4     <NA>
#> 11     68         1     NA         312        1        30          4     <NA>
#> 12     68         1     NA         312        1        34          6     <NA>
#> 13     17         1     NA         317        1        30          9     <NA>
#> 14     17         1     NA         317        1        30          9     <NA>
#> 15     50         1     NA         208        1        26         14     <NA>
#> 16     50         1     NA         208        1        27         16     <NA>
#> 17     50         1     NA         208        1        28          8     <NA>
#> 18     50         1     NA         208        1        29          4     <NA>
#> 19     50         1     NA         208        1        30          4     <NA>
#> 20     50         1     NA         208        1        31          2     <NA>
#>    LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1           NA          20190207      127143   SOL   SOL
#> 2           NA          20190207      127143   SOL   SOL
#> 3           NA          20190207      127143   SOL   SOL
#> 4           NA          20190207      127143   SOL   SOL
#> 5           NA          20190207      127143   SOL   SOL
#> 6           NA          20190207      127143   SOL   SOL
#> 7           NA          20190207      127143   SOL   SOL
#> 8           NA          20190207      127143   SOL   SOL
#> 9           NA          20190207      127143   SOL   SOL
#> 10          NA          20190207      127143   SOL   SOL
#> 11          NA          20190207      127143   SOL   SOL
#> 12          NA          20190207      127143   SOL   SOL
#> 13          NA          20190207      127143   SOL   SOL
#> 14          NA          20190207      127143   SOL   SOL
#> 15          NA          20190207      127143   SOL   SOL
#> 16          NA          20190207      127143   SOL   SOL
#> 17          NA          20190207      127143   SOL   SOL
#> 18          NA          20190207      127143   SOL   SOL
#> 19          NA          20190207      127143   SOL   SOL
#> 20          NA          20190207      127143   SOL   SOL
#>                          IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1  1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 2  1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 3  1995.1.GFR.06S1.H20.42.35      24 38G3       V              3             1
#> 4  1995.1.GFR.06S1.H20.42.35      24 38G3       V              3             1
#> 5  1995.1.GFR.06S1.H20.44.39      24 39G3       V              1             1
#> 6  1995.1.GFR.06S1.H20.44.39      24 39G3       V              1             1
#> 7  1995.1.GFR.06S1.H20.44.39      24 39G3       V              1             1
#> 8  1995.1.GFR.06S1.H20.44.39      24 39G3       V              1             1
#> 9  1995.1.GFR.06S1.H20.44.39      24 39G3       V              1             1
#> 10 1995.1.GFR.06S1.H20.44.39      24 39G3       V              1             1
#> 11 1995.1.GFR.06S1.H20.44.39      24 39G3       V              1             1
#> 12 1995.1.GFR.06S1.H20.44.39      24 39G3       V              1             1
#> 13 1995.1.GFR.06S1.H20.45.45      24 39G3       V              1             1
#> 14 1995.1.GFR.06S1.H20.45.45      24 39G3       V              1             1
#> 15 1995.1.GFR.06S1.H20.46.59      24 39G4       V              1             1
#> 16 1995.1.GFR.06S1.H20.46.59      24 39G4       V              1             1
#> 17 1995.1.GFR.06S1.H20.46.59      24 39G4       V              1             1
#> 18 1995.1.GFR.06S1.H20.46.59      24 39G4       V              1             1
#> 19 1995.1.GFR.06S1.H20.46.59      24 39G4       V              1             1
#> 20 1995.1.GFR.06S1.H20.46.59      24 39G4       V              1             1
#>    Fishing.line Month DataType HaulDur GroundSpeed                  haul.id
#> 1            36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 2            36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 3            36     2        C      30         3.6 1995:1:GFR:SOL:H20:42:35
#> 4            36     2        C      30         3.6 1995:1:GFR:SOL:H20:42:35
#> 5            36     2        C      30         4.2 1995:1:GFR:SOL:H20:44:39
#> 6            36     2        C      30         4.2 1995:1:GFR:SOL:H20:44:39
#> 7            36     2        C      30         4.2 1995:1:GFR:SOL:H20:44:39
#> 8            36     2        C      30         4.2 1995:1:GFR:SOL:H20:44:39
#> 9            36     2        C      30         4.2 1995:1:GFR:SOL:H20:44:39
#> 10           36     2        C      30         4.2 1995:1:GFR:SOL:H20:44:39
#> 11           36     2        C      30         4.2 1995:1:GFR:SOL:H20:44:39
#> 12           36     2        C      30         4.2 1995:1:GFR:SOL:H20:44:39
#> 13           36     2        C      31         3.8 1995:1:GFR:SOL:H20:45:45
#> 14           36     2        C      31         3.8 1995:1:GFR:SOL:H20:45:45
#> 15           36     2        C      30         3.8 1995:1:GFR:SOL:H20:46:59
#> 16           36     2        C      30         3.8 1995:1:GFR:SOL:H20:46:59
#> 17           36     2        C      30         3.8 1995:1:GFR:SOL:H20:46:59
#> 18           36     2        C      30         3.8 1995:1:GFR:SOL:H20:46:59
#> 19           36     2        C      30         3.8 1995:1:GFR:SOL:H20:46:59
#> 20           36     2        C      30         3.8 1995:1:GFR:SOL:H20:46:59
#>    ShootLat ShootLong      id_haul_stomach               Species CPUEun n
#> 1   54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      2 2
#> 2   54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      4 2
#> 3   54.9167     13.10 1995.1.2.GFR.38G3.35 Pleuronectes platessa      2 2
#> 4   54.9167     13.10 1995.1.2.GFR.38G3.35 Pleuronectes platessa      2 2
#> 5   55.0333     13.40 1995.1.2.GFR.39G3.39 Pleuronectes platessa     12 2
#> 6   55.0333     13.40 1995.1.2.GFR.39G3.39 Pleuronectes platessa     12 2
#> 7   55.0333     13.40 1995.1.2.GFR.39G3.39 Pleuronectes platessa      6 2
#> 8   55.0333     13.40 1995.1.2.GFR.39G3.39 Pleuronectes platessa      2 2
#> 9   55.0333     13.40 1995.1.2.GFR.39G3.39 Pleuronectes platessa      2 2
#> 10  55.0333     13.40 1995.1.2.GFR.39G3.39 Pleuronectes platessa      4 2
#> 11  55.0333     13.40 1995.1.2.GFR.39G3.39 Pleuronectes platessa      4 2
#> 12  55.0333     13.40 1995.1.2.GFR.39G3.39 Pleuronectes platessa      6 2
#> 13  55.0667     13.75 1995.1.2.GFR.39G3.45 Pleuronectes platessa      9 2
#> 14  55.0667     13.75 1995.1.2.GFR.39G3.45 Pleuronectes platessa      9 2
#> 15  55.1500     14.20 1995.1.2.GFR.39G4.59 Pleuronectes platessa     14 2
#> 16  55.1500     14.20 1995.1.2.GFR.39G4.59 Pleuronectes platessa     16 2
#> 17  55.1500     14.20 1995.1.2.GFR.39G4.59 Pleuronectes platessa      8 2
#> 18  55.1500     14.20 1995.1.2.GFR.39G4.59 Pleuronectes platessa      4 2
#> 19  55.1500     14.20 1995.1.2.GFR.39G4.59 Pleuronectes platessa      4 2
#> 20  55.1500     14.20 1995.1.2.GFR.39G4.59 Pleuronectes platessa      2 2
test <- hlpla0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2)
test_id <- test$haul.id[2]

hlplaL <- hlpla0 %>% 
  group_by(LngtClass, haul.id) %>% 
  mutate(CPUEun = sum(CPUEun)) %>%
  ungroup() %>% 
  mutate(id3 = paste(haul.id, LngtClass)) %>% 
  distinct(id3, .keep_all = TRUE) %>% 
  dplyr::select(-X, -id3) # Clean up a bit

# Check with an ID
filter(hlpla0, haul.id == test_id)
#>        X RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType
#> 1  99637         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 2  99638         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 3  99639         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 4  99640         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 5  99641         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 6  99642         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 7  99643         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 8  99644         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 9  99645         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 10  1887         HH   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#>    StNo HaulNo Year SpecCodeType SpecCode   SpecVal  Sex TotalNo CatIdentifier
#> 1    39     49 1995            W   127143         1    M       8             1
#> 2    39     49 1995            W   127143         1    M       8             1
#> 3    39     49 1995            W   127143         1    M       8             1
#> 4    39     49 1995            W   127143         1    M       8             1
#> 5    39     49 1995            W   127143         1    F      18             1
#> 6    39     49 1995            W   127143         1    F      18             1
#> 7    39     49 1995            W   127143         1    F      18             1
#> 8    39     49 1995            W   127143         1    F      18             1
#> 9    39     49 1995            W   127143         1    F      18             1
#> 10   39     49 1995         <NA>       NA zeroCatch <NA>      NA            NA
#>    NoMeas SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1      13         1     NA          61        1        23          2     <NA>
#> 2      13         1     NA          61        1        25          2     <NA>
#> 3      13         1     NA          61        1        26          2     <NA>
#> 4      13         1     NA          61        1        28          2     <NA>
#> 5      13         1     NA          61        1        27          2     <NA>
#> 6      13         1     NA          61        1        28          4     <NA>
#> 7      13         1     NA          61        1        31          6     <NA>
#> 8      13         1     NA          61        1        32          2     <NA>
#> 9      13         1     NA          61        1        33          4     <NA>
#> 10     NA        NA     NA          NA     <NA>        NA         NA     <NA>
#>    LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1           NA          20190207      127143   SOL   SOL
#> 2           NA          20190207      127143   SOL   SOL
#> 3           NA          20190207      127143   SOL   SOL
#> 4           NA          20190207      127143   SOL   SOL
#> 5           NA          20190207      127143   SOL   SOL
#> 6           NA          20190207      127143   SOL   SOL
#> 7           NA          20190207      127143   SOL   SOL
#> 8           NA          20190207      127143   SOL   SOL
#> 9           NA          20190207      127143   SOL   SOL
#> 10          NA          20220223          NA   SOL   SOL
#>                          IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1  1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 2  1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 3  1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 4  1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 5  1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 6  1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 7  1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 8  1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 9  1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 10 1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#>    Fishing.line Month DataType HaulDur GroundSpeed                  haul.id
#> 1            36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 2            36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 3            36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 4            36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 5            36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 6            36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 7            36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 8            36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 9            36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 10           36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#>    ShootLat ShootLong      id_haul_stomach               Species CPUEun
#> 1   54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      2
#> 2   54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      2
#> 3   54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      2
#> 4   54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      2
#> 5   54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      2
#> 6   54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      4
#> 7   54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      6
#> 8   54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      2
#> 9   54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      4
#> 10  54.9667     13.85                 <NA> Pleuronectes platessa      0
filter(hlplaL, haul.id == test_id) %>% as.data.frame()
#>   RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType StNo
#> 1         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   39
#> 2         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   39
#> 3         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   39
#> 4         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   39
#> 5         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   39
#> 6         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   39
#> 7         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   39
#> 8         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   39
#> 9         HH   BITS       1     GFR 06S1  H20        NA   <NA>       NA   39
#>   HaulNo Year SpecCodeType SpecCode   SpecVal  Sex TotalNo CatIdentifier NoMeas
#> 1     49 1995            W   127143         1    M       8             1     13
#> 2     49 1995            W   127143         1    M       8             1     13
#> 3     49 1995            W   127143         1    M       8             1     13
#> 4     49 1995            W   127143         1    M       8             1     13
#> 5     49 1995            W   127143         1    F      18             1     13
#> 6     49 1995            W   127143         1    F      18             1     13
#> 7     49 1995            W   127143         1    F      18             1     13
#> 8     49 1995            W   127143         1    F      18             1     13
#> 9     49 1995         <NA>       NA zeroCatch <NA>      NA            NA     NA
#>   SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1         1     NA          61        1        23          2     <NA>
#> 2         1     NA          61        1        25          2     <NA>
#> 3         1     NA          61        1        26          2     <NA>
#> 4         1     NA          61        1        28          2     <NA>
#> 5         1     NA          61        1        27          2     <NA>
#> 6         1     NA          61        1        31          6     <NA>
#> 7         1     NA          61        1        32          2     <NA>
#> 8         1     NA          61        1        33          4     <NA>
#> 9        NA     NA          NA     <NA>        NA         NA     <NA>
#>   LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1          NA          20190207      127143   SOL   SOL
#> 2          NA          20190207      127143   SOL   SOL
#> 3          NA          20190207      127143   SOL   SOL
#> 4          NA          20190207      127143   SOL   SOL
#> 5          NA          20190207      127143   SOL   SOL
#> 6          NA          20190207      127143   SOL   SOL
#> 7          NA          20190207      127143   SOL   SOL
#> 8          NA          20190207      127143   SOL   SOL
#> 9          NA          20220223          NA   SOL   SOL
#>                         IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1 1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 2 1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 3 1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 4 1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 5 1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 6 1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 7 1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 8 1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#> 9 1995.1.GFR.06S1.H20.39.49      24 38G3       V              1             1
#>   Fishing.line Month DataType HaulDur GroundSpeed                  haul.id
#> 1           36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 2           36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 3           36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 4           36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 5           36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 6           36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 7           36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 8           36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#> 9           36     2        C      30         3.4 1995:1:GFR:SOL:H20:39:49
#>   ShootLat ShootLong      id_haul_stomach               Species CPUEun
#> 1  54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      2
#> 2  54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      2
#> 3  54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      2
#> 4  54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      6
#> 5  54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      2
#> 6  54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      6
#> 7  54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      2
#> 8  54.9667     13.85 1995.1.2.GFR.38G3.49 Pleuronectes platessa      4
#> 9  54.9667     13.85                 <NA> Pleuronectes platessa      0

# Do we still have 0 catches?
hlplaL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(!CPUEun_haul == 0)
#> # A tibble: 5,568 × 3
#>    haul.id                     Year CPUEun_haul
#>    <chr>                      <int>       <dbl>
#>  1 1993:1:DEN:DAN2:GRT:11:6    1993           3
#>  2 1993:1:DEN:DAN2:GRT:13:7    1993           1
#>  3 1993:1:DEN:DAN2:GRT:146:68  1993           1
#>  4 1993:1:DEN:DAN2:GRT:160:70  1993           2
#>  5 1993:1:DEN:DAN2:GRT:161:71  1993           2
#>  6 1993:1:DEN:DAN2:GRT:17:9    1993           1
#>  7 1993:1:DEN:DAN2:GRT:23:12   1993           1
#>  8 1993:1:DEN:DAN2:GRT:45:21   1993           2
#>  9 1993:1:DEN:DAN2:GRT:48:22   1993           1
#> 10 1993:1:DEN:DAN2:GRT:55:26   1993           3
#> # … with 5,558 more rows

hlplaL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
  geom_bar(stat = "identity")



# Dab
hldab0 <- hldab0 %>%
  mutate(CPUEun = ifelse(SpecVal == "1" & DataType == "C",
                         HLNoAtLngt,
                         
                         ifelse(SpecVal == "1" & DataType == "R",
                                HLNoAtLngt/(HaulDur/60),
                                
                                ifelse(SpecVal == "1" & DataType == "S",
                                       (HLNoAtLngt*SubFactor)/(HaulDur/60),
                                       
                                       ifelse(SpecVal == "zeroCatch", 0, NA)))))

# Plot and fill by zero catch
hldab0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(!CPUEun_haul == 0)
#> # A tibble: 1,930 × 3
#>    haul.id                     Year CPUEun_haul
#>    <chr>                      <int>       <dbl>
#>  1 1993:1:DEN:DAN2:GRT:1:1     1993           2
#>  2 1993:1:DEN:DAN2:GRT:11:6    1993           5
#>  3 1993:1:DEN:DAN2:GRT:13:7    1993           1
#>  4 1993:1:DEN:DAN2:GRT:14:8    1993           1
#>  5 1993:1:DEN:DAN2:GRT:161:71  1993           1
#>  6 1993:1:DEN:DAN2:GRT:162:72  1993           1
#>  7 1993:1:DEN:DAN2:GRT:17:9    1993           1
#>  8 1993:1:DEN:DAN2:GRT:3:2     1993           4
#>  9 1993:1:DEN:DAN2:GRT:4:3     1993           3
#> 10 1993:1:DEN:DAN2:GRT:52:24   1993           1
#> # … with 1,920 more rows

hldab0 %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
  geom_bar(stat = "identity")


# Some rows have multiple rows per combination of length class and haul id (i suppose often because it's split by sex), so we need to sum it up 
hldab0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% distinct(n)
#> # A tibble: 3 × 1
#>       n
#>   <int>
#> 1     1
#> 2     2
#> 3     3
hldab0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2) %>% as.data.frame() %>% head(20)
#>         X RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType
#> 1   98413         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 2   98414         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 3   98415         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 4   98416         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 5   98631         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 6   98632         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 7   98633         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 8   98637         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 9   98638         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 10  98639         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 11  98726         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 12  98727         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 13  98728         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 14  98730         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 15  98731         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 16  98733         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 17 100025         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 18 100026         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 19 100027         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 20 100030         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#>    StNo HaulNo Year SpecCodeType SpecCode SpecVal Sex TotalNo CatIdentifier
#> 1    27      6 1995            W   127139       1   M      10             1
#> 2    27      6 1995            W   127139       1   M      10             1
#> 3    27      6 1995            W   127139       1   F       4             1
#> 4    27      6 1995            W   127139       1   F       4             1
#> 5    32     31 1995            W   127139       1   M      24             1
#> 6    32     31 1995            W   127139       1   M      24             1
#> 7    32     31 1995            W   127139       1   M      24             1
#> 8    32     31 1995            W   127139       1   F      12             1
#> 9    32     31 1995            W   127139       1   F      12             1
#> 10   32     31 1995            W   127139       1   F      12             1
#> 11   33     29 1995            W   127139       1   M      20             1
#> 12   33     29 1995            W   127139       1   M      20             1
#> 13   33     29 1995            W   127139       1   M      20             1
#> 14   33     29 1995            W   127139       1   F      20             1
#> 15   33     29 1995            W   127139       1   F      20             1
#> 16   33     29 1995            W   127139       1   F      20             1
#> 17   42     35 1995            W   127139       1   M      38             1
#> 18   42     35 1995            W   127139       1   M      38             1
#> 19   42     35 1995            W   127139       1   M      38             1
#> 20   42     35 1995            W   127139       1   M      38             1
#>    NoMeas SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1       7         1     NA           7        1        16          2     <NA>
#> 2       7         1     NA           7        1        17          4     <NA>
#> 3       7         1     NA           7        1        16          2     <NA>
#> 4       7         1     NA           7        1        17          2     <NA>
#> 5      18         1     NA          24        1        16          4     <NA>
#> 6      18         1     NA          24        1        17          6     <NA>
#> 7      18         1     NA          24        1        18          2     <NA>
#> 8      18         1     NA          24        1        16          2     <NA>
#> 9      18         1     NA          24        1        17          6     <NA>
#> 10     18         1     NA          24        1        18          4     <NA>
#> 11     20         1     NA          43        1        17          4     <NA>
#> 12     20         1     NA          43        1        18          2     <NA>
#> 13     20         1     NA          43        1        20          4     <NA>
#> 14     20         1     NA          43        1        17          4     <NA>
#> 15     20         1     NA          43        1        18          4     <NA>
#> 16     20         1     NA          43        1        20          4     <NA>
#> 17     29         1     NA          82        1        18          4     <NA>
#> 18     29         1     NA          82        1        19          2     <NA>
#> 19     29         1     NA          82        1        20          2     <NA>
#> 20     29         1     NA          82        1        25          6     <NA>
#>    LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1           NA          20190207      127139   SOL   SOL
#> 2           NA          20190207      127139   SOL   SOL
#> 3           NA          20190207      127139   SOL   SOL
#> 4           NA          20190207      127139   SOL   SOL
#> 5           NA          20190207      127139   SOL   SOL
#> 6           NA          20190207      127139   SOL   SOL
#> 7           NA          20190207      127139   SOL   SOL
#> 8           NA          20190207      127139   SOL   SOL
#> 9           NA          20190207      127139   SOL   SOL
#> 10          NA          20190207      127139   SOL   SOL
#> 11          NA          20190207      127139   SOL   SOL
#> 12          NA          20190207      127139   SOL   SOL
#> 13          NA          20190207      127139   SOL   SOL
#> 14          NA          20190207      127139   SOL   SOL
#> 15          NA          20190207      127139   SOL   SOL
#> 16          NA          20190207      127139   SOL   SOL
#> 17          NA          20190207      127139   SOL   SOL
#> 18          NA          20190207      127139   SOL   SOL
#> 19          NA          20190207      127139   SOL   SOL
#> 20          NA          20190207      127139   SOL   SOL
#>                          IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1   1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 2   1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 3   1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 4   1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 5  1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 6  1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 7  1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 8  1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 9  1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 10 1995.1.GFR.06S1.H20.32.31      24 38G3       V              1             1
#> 11 1995.1.GFR.06S1.H20.33.29      24 38G3       V              1             1
#> 12 1995.1.GFR.06S1.H20.33.29      24 38G3       V              1             1
#> 13 1995.1.GFR.06S1.H20.33.29      24 38G3       V              1             1
#> 14 1995.1.GFR.06S1.H20.33.29      24 38G3       V              1             1
#> 15 1995.1.GFR.06S1.H20.33.29      24 38G3       V              1             1
#> 16 1995.1.GFR.06S1.H20.33.29      24 38G3       V              1             1
#> 17 1995.1.GFR.06S1.H20.42.35      24 38G3       V              3             1
#> 18 1995.1.GFR.06S1.H20.42.35      24 38G3       V              3             1
#> 19 1995.1.GFR.06S1.H20.42.35      24 38G3       V              3             1
#> 20 1995.1.GFR.06S1.H20.42.35      24 38G3       V              3             1
#>    Fishing.line Month DataType HaulDur GroundSpeed                  haul.id
#> 1            36     2        C      30         3.6  1995:1:GFR:SOL:H20:27:6
#> 2            36     2        C      30         3.6  1995:1:GFR:SOL:H20:27:6
#> 3            36     2        C      30         3.6  1995:1:GFR:SOL:H20:27:6
#> 4            36     2        C      30         3.6  1995:1:GFR:SOL:H20:27:6
#> 5            36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 6            36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 7            36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 8            36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 9            36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 10           36     2        C      30         3.6 1995:1:GFR:SOL:H20:32:31
#> 11           36     2        C      31         3.6 1995:1:GFR:SOL:H20:33:29
#> 12           36     2        C      31         3.6 1995:1:GFR:SOL:H20:33:29
#> 13           36     2        C      31         3.6 1995:1:GFR:SOL:H20:33:29
#> 14           36     2        C      31         3.6 1995:1:GFR:SOL:H20:33:29
#> 15           36     2        C      31         3.6 1995:1:GFR:SOL:H20:33:29
#> 16           36     2        C      31         3.6 1995:1:GFR:SOL:H20:33:29
#> 17           36     2        C      30         3.6 1995:1:GFR:SOL:H20:42:35
#> 18           36     2        C      30         3.6 1995:1:GFR:SOL:H20:42:35
#> 19           36     2        C      30         3.6 1995:1:GFR:SOL:H20:42:35
#> 20           36     2        C      30         3.6 1995:1:GFR:SOL:H20:42:35
#>    ShootLat ShootLong      id_haul_stomach         Species CPUEun n
#> 1   54.5167     13.80  1995.1.2.GFR.38G3.6 Limanda limanda      2 2
#> 2   54.5167     13.80  1995.1.2.GFR.38G3.6 Limanda limanda      4 2
#> 3   54.5167     13.80  1995.1.2.GFR.38G3.6 Limanda limanda      2 2
#> 4   54.5167     13.80  1995.1.2.GFR.38G3.6 Limanda limanda      2 2
#> 5   54.7333     13.40 1995.1.2.GFR.38G3.31 Limanda limanda      4 2
#> 6   54.7333     13.40 1995.1.2.GFR.38G3.31 Limanda limanda      6 2
#> 7   54.7333     13.40 1995.1.2.GFR.38G3.31 Limanda limanda      2 2
#> 8   54.7333     13.40 1995.1.2.GFR.38G3.31 Limanda limanda      2 2
#> 9   54.7333     13.40 1995.1.2.GFR.38G3.31 Limanda limanda      6 2
#> 10  54.7333     13.40 1995.1.2.GFR.38G3.31 Limanda limanda      4 2
#> 11  54.7000     13.55 1995.1.2.GFR.38G3.29 Limanda limanda      4 2
#> 12  54.7000     13.55 1995.1.2.GFR.38G3.29 Limanda limanda      2 2
#> 13  54.7000     13.55 1995.1.2.GFR.38G3.29 Limanda limanda      4 2
#> 14  54.7000     13.55 1995.1.2.GFR.38G3.29 Limanda limanda      4 2
#> 15  54.7000     13.55 1995.1.2.GFR.38G3.29 Limanda limanda      4 2
#> 16  54.7000     13.55 1995.1.2.GFR.38G3.29 Limanda limanda      4 2
#> 17  54.9167     13.10 1995.1.2.GFR.38G3.35 Limanda limanda      4 2
#> 18  54.9167     13.10 1995.1.2.GFR.38G3.35 Limanda limanda      2 2
#> 19  54.9167     13.10 1995.1.2.GFR.38G3.35 Limanda limanda      2 2
#> 20  54.9167     13.10 1995.1.2.GFR.38G3.35 Limanda limanda      6 2
test <- hldab0 %>% group_by(LngtClass, haul.id) %>% mutate(n = n()) %>% ungroup() %>% filter(n == 2)
test_id <- test$haul.id[2]

hldabL <- hldab0 %>% 
  group_by(LngtClass, haul.id) %>% 
  mutate(CPUEun = sum(CPUEun)) %>%
  ungroup() %>% 
  mutate(id3 = paste(haul.id, LngtClass)) %>% 
  distinct(id3, .keep_all = TRUE) %>% 
  dplyr::select(-X, -id3) # Clean up a bit

# Check with an ID
filter(hldab0, haul.id == test_id)
#>       X RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType
#> 1 98411         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 2 98412         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 3 98413         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 4 98414         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 5 98415         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 6 98416         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#> 7  1926         HH   BITS       1     GFR 06S1  H20        NA   <NA>       NA
#>   StNo HaulNo Year SpecCodeType SpecCode   SpecVal  Sex TotalNo CatIdentifier
#> 1   27      6 1995            W   127139         1    M      10             1
#> 2   27      6 1995            W   127139         1    M      10             1
#> 3   27      6 1995            W   127139         1    M      10             1
#> 4   27      6 1995            W   127139         1    M      10             1
#> 5   27      6 1995            W   127139         1    F       4             1
#> 6   27      6 1995            W   127139         1    F       4             1
#> 7   27      6 1995         <NA>       NA zeroCatch <NA>      NA            NA
#>   NoMeas SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1      7         1     NA           7        1        11          2     <NA>
#> 2      7         1     NA           7        1        15          2     <NA>
#> 3      7         1     NA           7        1        16          2     <NA>
#> 4      7         1     NA           7        1        17          4     <NA>
#> 5      7         1     NA           7        1        16          2     <NA>
#> 6      7         1     NA           7        1        17          2     <NA>
#> 7     NA        NA     NA          NA     <NA>        NA         NA     <NA>
#>   LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1          NA          20190207      127139   SOL   SOL
#> 2          NA          20190207      127139   SOL   SOL
#> 3          NA          20190207      127139   SOL   SOL
#> 4          NA          20190207      127139   SOL   SOL
#> 5          NA          20190207      127139   SOL   SOL
#> 6          NA          20190207      127139   SOL   SOL
#> 7          NA          20220223          NA   SOL   SOL
#>                        IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1 1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 2 1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 3 1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 4 1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 5 1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 6 1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 7 1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#>   Fishing.line Month DataType HaulDur GroundSpeed                 haul.id
#> 1           36     2        C      30         3.6 1995:1:GFR:SOL:H20:27:6
#> 2           36     2        C      30         3.6 1995:1:GFR:SOL:H20:27:6
#> 3           36     2        C      30         3.6 1995:1:GFR:SOL:H20:27:6
#> 4           36     2        C      30         3.6 1995:1:GFR:SOL:H20:27:6
#> 5           36     2        C      30         3.6 1995:1:GFR:SOL:H20:27:6
#> 6           36     2        C      30         3.6 1995:1:GFR:SOL:H20:27:6
#> 7           36     2        C      30         3.6 1995:1:GFR:SOL:H20:27:6
#>   ShootLat ShootLong     id_haul_stomach         Species CPUEun
#> 1  54.5167      13.8 1995.1.2.GFR.38G3.6 Limanda limanda      2
#> 2  54.5167      13.8 1995.1.2.GFR.38G3.6 Limanda limanda      2
#> 3  54.5167      13.8 1995.1.2.GFR.38G3.6 Limanda limanda      2
#> 4  54.5167      13.8 1995.1.2.GFR.38G3.6 Limanda limanda      4
#> 5  54.5167      13.8 1995.1.2.GFR.38G3.6 Limanda limanda      2
#> 6  54.5167      13.8 1995.1.2.GFR.38G3.6 Limanda limanda      2
#> 7  54.5167      13.8                <NA> Limanda limanda      0
filter(hldabL, haul.id == test_id) %>% as.data.frame()
#>   RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType StNo
#> 1         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   27
#> 2         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   27
#> 3         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   27
#> 4         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   27
#> 5         HH   BITS       1     GFR 06S1  H20        NA   <NA>       NA   27
#>   HaulNo Year SpecCodeType SpecCode   SpecVal  Sex TotalNo CatIdentifier NoMeas
#> 1      6 1995            W   127139         1    M      10             1      7
#> 2      6 1995            W   127139         1    M      10             1      7
#> 3      6 1995            W   127139         1    M      10             1      7
#> 4      6 1995            W   127139         1    M      10             1      7
#> 5      6 1995         <NA>       NA zeroCatch <NA>      NA            NA     NA
#>   SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1         1     NA           7        1        11          2     <NA>
#> 2         1     NA           7        1        15          2     <NA>
#> 3         1     NA           7        1        16          2     <NA>
#> 4         1     NA           7        1        17          4     <NA>
#> 5        NA     NA          NA     <NA>        NA         NA     <NA>
#>   LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1          NA          20190207      127139   SOL   SOL
#> 2          NA          20190207      127139   SOL   SOL
#> 3          NA          20190207      127139   SOL   SOL
#> 4          NA          20190207      127139   SOL   SOL
#> 5          NA          20220223          NA   SOL   SOL
#>                        IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1 1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 2 1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 3 1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 4 1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#> 5 1995.1.GFR.06S1.H20.27.6      24 38G3       V              1             1
#>   Fishing.line Month DataType HaulDur GroundSpeed                 haul.id
#> 1           36     2        C      30         3.6 1995:1:GFR:SOL:H20:27:6
#> 2           36     2        C      30         3.6 1995:1:GFR:SOL:H20:27:6
#> 3           36     2        C      30         3.6 1995:1:GFR:SOL:H20:27:6
#> 4           36     2        C      30         3.6 1995:1:GFR:SOL:H20:27:6
#> 5           36     2        C      30         3.6 1995:1:GFR:SOL:H20:27:6
#>   ShootLat ShootLong     id_haul_stomach         Species CPUEun
#> 1  54.5167      13.8 1995.1.2.GFR.38G3.6 Limanda limanda      2
#> 2  54.5167      13.8 1995.1.2.GFR.38G3.6 Limanda limanda      2
#> 3  54.5167      13.8 1995.1.2.GFR.38G3.6 Limanda limanda      4
#> 4  54.5167      13.8 1995.1.2.GFR.38G3.6 Limanda limanda      6
#> 5  54.5167      13.8                <NA> Limanda limanda      0

# Do we still have 0 catches?
hldabL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  filter(!CPUEun_haul == 0)
#> # A tibble: 1,930 × 3
#>    haul.id                     Year CPUEun_haul
#>    <chr>                      <int>       <dbl>
#>  1 1993:1:DEN:DAN2:GRT:1:1     1993           2
#>  2 1993:1:DEN:DAN2:GRT:11:6    1993           5
#>  3 1993:1:DEN:DAN2:GRT:13:7    1993           1
#>  4 1993:1:DEN:DAN2:GRT:14:8    1993           1
#>  5 1993:1:DEN:DAN2:GRT:161:71  1993           1
#>  6 1993:1:DEN:DAN2:GRT:162:72  1993           1
#>  7 1993:1:DEN:DAN2:GRT:17:9    1993           1
#>  8 1993:1:DEN:DAN2:GRT:3:2     1993           4
#>  9 1993:1:DEN:DAN2:GRT:4:3     1993           3
#> 10 1993:1:DEN:DAN2:GRT:52:24   1993           1
#> # … with 1,920 more rows

hldabL %>%
  group_by(haul.id, Year) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>%
  group_by(Year, zero_catch) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(x = Year, y = n, fill = zero_catch)) +
  geom_bar(stat = "identity")

Get and add annual weight-length relationships from the CA data so that I can calculate CPUE in biomass rather than numbers further down

# Cod
bits_ca_cod <- bits_ca %>% 
  filter(SpecCode %in% c("164712", "126436")) %>% 
  mutate(StNo = as.numeric(StNo)) %>% 
  mutate(Species = "Cod") %>% 
  mutate(ID = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
#> Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

# Now I need to copy rows with NoAtLngt > 1 so that 1 row = 1 ind
# First make a small test
# nrow(bits_ca_cod)
# test_id <- head(filter(bits_ca_cod, CANoAtLngt == 5))$ID[1]
# filter(bits_ca_cod, ID == test_id & CANoAtLngt == 5)

bits_ca_cod <- bits_ca_cod %>% map_df(., rep, .$CANoAtLngt)

# head(data.frame(filter(bits_ca_cod, ID == test_id & CANoAtLngt == 5)), 20)
# nrow(bits_ca_cod)
# Looks ok!

# Standardize length and drop NA weights (need that for condition)
bits_ca_cod <- bits_ca_cod %>% 
  drop_na(IndWgt) %>% 
  drop_na(LngtClass) %>% 
  filter(IndWgt > 0 & LngtClass > 0) %>%  # Filter positive length and weight
  mutate(weight_kg = IndWgt/1000) %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) # Standardize length ((https://vocab.ices.dk/?ref=18))

# Plot
ggplot(bits_ca_cod, aes(IndWgt, length_cm)) +
  geom_point() + 
  facet_wrap(~Year)


# Now extract the coefficients for each year (not bothering with outliers at the moment)
cod_intercept <- bits_ca_cod %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "(Intercept)") %>% 
  mutate(a = exp(estimate)) %>% 
  mutate(Year = as.integer(Year)) %>% 
  dplyr::select(Year, a)

cod_slope <- bits_ca_cod %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "log(length_cm)") %>% 
  mutate(Year = as.integer(Year)) %>% 
  rename("b" = "estimate") %>% 
  dplyr::select(Year, b)

mean(cod_intercept$a)
#> [1] 0.008358197
mean(cod_slope$b)
#> [1] 3.034025

# Flounder
bits_ca_fle <- bits_ca %>% 
  filter(SpecCode %in% c("127141", "172894")) %>% 
  mutate(StNo = as.numeric(StNo)) %>% 
  mutate(Species = "Flounder") %>% 
  mutate(ID = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))
#> Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

bits_ca_fle <- bits_ca_fle %>% map_df(., rep, .$CANoAtLngt)

# Standardize length and drop NA weights (need that for condition)
bits_ca_fle <- bits_ca_fle %>% 
  drop_na(IndWgt) %>% 
  drop_na(LngtClass) %>% 
  filter(IndWgt > 0 & LngtClass > 0) %>%  # Filter positive length and weight
  mutate(weight_kg = IndWgt/1000) %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) %>% # Standardize length ((https://vocab.ices.dk/?ref=18))
  mutate(keep = ifelse(LngtCode == "." & Year == 2008, "N", "Y")) %>%
  filter(keep == "Y") %>% 
  filter(length_cm < 70)

# Plot
ggplot(bits_ca_fle, aes(IndWgt, length_cm, color = LngtCode)) +
  geom_point() + 
  facet_wrap(~Year)


# Now extract the coefficients for each year (not bothering with outliers at the moment)
fle_intercept <- bits_ca_fle %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "(Intercept)") %>% 
  mutate(a = exp(estimate)) %>% 
  mutate(Year = as.integer(Year)) %>% 
  dplyr::select(Year, a)

fle_slope <- bits_ca_fle %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "log(length_cm)") %>% 
  mutate(Year = as.integer(Year)) %>% 
  rename("b" = "estimate") %>% 
  dplyr::select(Year, b)

mean(fle_intercept$a)
#> [1] 0.009891037
mean(fle_slope$b)
#> [1] 3.07509


# Plaice
bits_ca_pla <- bits_ca %>% 
  filter(SpecCode %in% c("127143", "172902")) %>% 
  mutate(StNo = as.numeric(StNo)) %>% 
  mutate(Species = "Plaice") %>% 
  mutate(ID = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

bits_ca_pla <- bits_ca_pla %>% map_df(., rep, .$CANoAtLngt)

# Standardize length and drop NA weights (need that for condition)
bits_ca_pla <- bits_ca_pla %>% 
  drop_na(IndWgt) %>% 
  drop_na(LngtClass) %>% 
  filter(IndWgt > 0 & LngtClass > 0) %>%  # Filter positive length and weight
  mutate(weight_kg = IndWgt/1000) %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) %>% # Standardize length ((https://vocab.ices.dk/?ref=18))
  mutate(keep = ifelse(LngtCode == "." & Year == 2008, "N", "Y")) %>%
  filter(keep == "Y") %>% 
  filter(length_cm < 70)

# Plot
ggplot(bits_ca_pla, aes(IndWgt, length_cm, color = LngtCode)) +
  geom_point() + 
  facet_wrap(~Year)


# Now extract the coefficients for each year (not bothering with outliers at the moment)
pla_intercept <- bits_ca_pla %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "(Intercept)") %>% 
  mutate(a = exp(estimate)) %>% 
  mutate(Year = as.integer(Year)) %>% 
  dplyr::select(Year, a)

pla_slope <- bits_ca_pla %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "log(length_cm)") %>% 
  mutate(Year = as.integer(Year)) %>% 
  rename("b" = "estimate") %>% 
  dplyr::select(Year, b)

mean(pla_intercept$a)
#> [1] 0.01782357
mean(pla_slope$b)
#> [1] 3.042893


# Dab
bits_ca_dab <- bits_ca %>% 
  filter(SpecCode %in% c("127139", "172881")) %>% 
  mutate(StNo = as.numeric(StNo)) %>% 
  mutate(Species = "Dab") %>% 
  mutate(ID = paste(Year, Quarter, Country, Ship, Gear, StNo, HaulNo, sep = "."))

bits_ca_dab <- bits_ca_dab %>% map_df(., rep, .$CANoAtLngt)

# Standardize length and drop NA weights (need that for condition)
bits_ca_dab <- bits_ca_dab %>% 
  drop_na(IndWgt) %>% 
  drop_na(LngtClass) %>% 
  filter(IndWgt > 0 & LngtClass > 0) %>%  # Filter positive length and weight
  mutate(weight_kg = IndWgt/1000) %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) %>% # Standardize length ((https://vocab.ices.dk/?ref=18))
  mutate(keep = ifelse(LngtCode == "." & Year == 2008, "N", "Y")) %>%
  filter(keep == "Y") %>% 
  filter(length_cm < 70)

# Plot
ggplot(bits_ca_dab, aes(IndWgt, length_cm, color = LngtCode)) +
  geom_point() + 
  facet_wrap(~Year)


bits_ca_dab <- bits_ca_dab %>% filter(IndWgt < 1000)

# Now extract the coefficients for each year (not bothering with outliers at the moment)
dab_intercept <- bits_ca_dab %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "(Intercept)") %>% 
  mutate(a = exp(estimate)) %>% 
  mutate(Year = as.integer(Year)) %>% 
  dplyr::select(Year, a)

dab_slope <- bits_ca_dab %>%
  split(.$Year) %>%
  purrr::map(~lm(log(IndWgt) ~ log(length_cm), data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'Year') %>%
  filter(term == "log(length_cm)") %>% 
  mutate(Year = as.integer(Year)) %>% 
  rename("b" = "estimate") %>% 
  dplyr::select(Year, b)

mean(dab_intercept$a)
#> [1] 0.008858112
mean(dab_slope$b)
#> [1] 3.079707

Join the annual L-W relationships to the respective catch data to calculate CPUE in biomass not abundance

# These are the haul-data
# hlcodL
# hlfleL
# hlplaL
# hldabL

hlcodL <- left_join(hlcodL, cod_intercept, by = "Year")
hlcodL <- left_join(hlcodL, cod_slope, by = "Year")

hlfleL <- left_join(hlfleL, fle_intercept, by = "Year")
hlfleL <- left_join(hlfleL, fle_slope, by = "Year")

hlplaL <- left_join(hlplaL, pla_intercept, by = "Year")
hlplaL <- left_join(hlplaL, pla_slope, by = "Year")

# Now replace NA a and b (don't have individual data for all years) with the mean
hlplaL <- hlplaL %>% 
  mutate(a = ifelse(is.na(a), mean(a, na.rm = TRUE), a),
         b = ifelse(is.na(b), mean(b, na.rm = TRUE), b))

hldabL <- left_join(hldabL, dab_intercept, by = "Year")
hldabL <- left_join(hldabL, dab_slope, by = "Year")

# Now replace NA a and b (don't have individual data for all years) with the mean
hldabL <- hldabL %>% 
  mutate(a = ifelse(is.na(a), mean(a, na.rm = TRUE), a),
         b = ifelse(is.na(b), mean(b, na.rm = TRUE), b))

Convert from CPUE in numbers to kg

# Cod
# First standardize length to cm and then check how zero-catches are implemented at this stage
hlcodL <- hlcodL %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) # Standardize length ((https://vocab.ices.dk/?ref=18))

filter(hlcodL, length_cm == 0) # No such thing
#> # A tibble: 0 × 51
#> # … with 51 variables: RecordType <chr>, Survey <chr>, Quarter <int>,
#> #   Country <chr>, Ship <chr>, Gear <chr>, SweepLngt <int>, GearEx <chr>,
#> #   DoorType <lgl>, StNo <chr>, HaulNo <int>, Year <int>, SpecCodeType <chr>,
#> #   SpecCode <int>, SpecVal <fct>, Sex <chr>, TotalNo <dbl>,
#> #   CatIdentifier <int>, NoMeas <int>, SubFactor <dbl>, SubWgt <int>,
#> #   CatCatchWgt <int>, LngtCode <chr>, LngtClass <int>, HLNoAtLngt <dbl>,
#> #   DevStage <chr>, LenMeasType <int>, DateofCalculation <int>, …

# Now check if all rows where length is NA are the ones with zero catch!
hlcodL %>% 
  mutate(length2 = replace_na(length_cm, -9),
         no_length = ifelse(length2 < 0, "T", "F")) %>% 
  ggplot(., aes(length2, CPUEun, color = no_length)) + geom_point(alpha = 0.2) + facet_wrap(~no_length)
#> Warning: Removed 1 rows containing missing values (geom_point).


hlcodL %>% filter(CPUEun == 0) %>% distinct(length_cm)
#> # A tibble: 1 × 1
#>   length_cm
#>       <dbl>
#> 1        NA

# Right, so all hauls with zero catch have NA length_cm. I don't have any NA catches
t <- hlcodL %>% drop_na(CPUEun)
t <- hlcodL %>% filter(CPUEun == 0)
t <- hlcodL %>% drop_na(length_cm)

# In other words, a zero catch is when the catch is zero and length_cm is NA
# In order to not get any NA CPUEs in unit biomass because length is NA (I want them instead
# to be 0, as the numbers-CPUE is), I will replace length_cm == NA with length_cm == 0 before
# calculating biomass CPUE
hlcodL <- hlcodL %>% mutate(length_cm2 = replace_na(length_cm, 0))

# Standardize length in the haul-data and calculate weight
hlcodL <- hlcodL %>% 
  mutate(weight_kg = (a*length_cm2^b)/1000) %>% 
  mutate(CPUEun_kg = weight_kg*CPUEun)

# Plot and check it's correct also in this data
ggplot(hlcodL, aes(weight_kg, length_cm2)) +
  geom_point() + 
  facet_wrap(~Year)


# Hmm, some unrealistic weights actually
hlcodL %>% arrange(desc(weight_kg)) %>% as.data.frame() %>% head(50)
#>    RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType  StNo
#> 1          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 2          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 3          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 4          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 5          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 6          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 7          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 8          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 9          HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 10         HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 11         HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 12         HL   BITS       1     DEN 26D4  GRT        NA   <NA>       NA    45
#> 13         HL   BITS       1     SWE 77AR  FOT       185   <NA>       NA    80
#> 14         HL   BITS       1     DEN 26D4  GRT        NA      S       NA    83
#> 15         HL   BITS       1     DEN 26D4  GRT        60   <NA>       NA    43
#> 16         HL   BITS       1     DEN 26D4  GRT        NA   <NA>       NA     9
#> 17         HL   BITS       1     SWE 77AR  FOT       225   <NA>       NA   191
#> 18         HL   BITS       1     DEN 26D4  TVL        75      S       NA    71
#> 19         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA    46
#> 20         HL   BITS       1     DEN 26D4  GRT        60   <NA>       NA   117
#> 21         HL   BITS       4     SWE 77AR  GOV        75   <NA>       NA   584
#> 22         HL   BITS       1     DEN 26D4  GRT        NA   <NA>       NA    75
#> 23         HL   BITS       4     DEN 26D4  TVL        75      S       NA    10
#> 24         HL   BITS       1     DEN 26D4  GRT        NA      S       NA   136
#> 25         HL   BITS       4     SWE 77AR  TVL        75   <NA>       NA   573
#> 26         HL   BITS       1     SWE 77AR  GOV       100   <NA>       NA   220
#> 27         HL   BITS       1     SWE 77AR  GOV       100   <NA>       NA   237
#> 28         HL   BITS       1     DEN 26D4  GRT        NA   <NA>       NA   111
#> 29         HL   BITS       4     SWE 77AR  FOT       225   <NA>       NA   231
#> 30         HL   BITS       4     SWE 77AR  FOT       203   <NA>       NA   247
#> 31         HL   BITS       1     SWE 77AR  FOT       225   <NA>       NA   207
#> 32         HL   BITS       4     SWE 77AR  GOV       100   <NA>       NA   652
#> 33         HL   BITS       1     DEN 26D4  GRT        NA   <NA>       NA    53
#> 34         HL   BITS       1     DEN 26D4  GRT        NA   <NA>       NA    57
#> 35         HL   BITS       1     RUS RUJB  TVL        NA   <NA>       NA  <NA>
#> 36         HL   BITS       1     GFR 06SL  TVS        NA   <NA>       NA 24316
#> 37         HL   BITS       1     DEN 26D4  GRT       110   <NA>       NA   162
#> 38         HL   BITS       1     DEN 26D4  GRT        60   <NA>       NA    74
#> 39         HL   BITS       1     SWE 77AR  FOT       225   <NA>       NA   222
#> 40         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA    44
#> 41         HL   BITS       1     DEN 26D4  TVL        NA      S     TRUE    95
#> 42         HL   BITS       1     DEN 26D4  TVL        75      S       NA     6
#> 43         HL   BITS       1     SWE 77AR  FOT       185   <NA>       NA    58
#> 44         HL   BITS       4     POL 67BC  TVL        75      S       NA 26132
#> 45         HL   BITS       1     RUS RUJB  HAK        NA   <NA>       NA  <NA>
#> 46         HL   BITS       1     RUS RUJB  HAK        NA   <NA>       NA  <NA>
#> 47         HL   BITS       1     DEN 26D4  TVL        NA      S     TRUE    19
#> 48         HL   BITS       1     DEN 26D4  GRT        60   <NA>       NA    17
#> 49         HL   BITS       4     SWE 77AR  FOT       180   <NA>       NA   252
#> 50         HL   BITS       4     DEN 26D4  TVL        75      S       NA    94
#>    HaulNo Year SpecCodeType SpecCode SpecVal  Sex TotalNo CatIdentifier NoMeas
#> 1      40 1998            T   164712       1 <NA>      17             1     17
#> 2      40 1998            T   164712       1 <NA>      17             1     17
#> 3      40 1998            T   164712       1 <NA>      17             1     17
#> 4      40 1998            T   164712       1 <NA>      17             1     17
#> 5      40 1998            T   164712       1 <NA>      17             1     17
#> 6      40 1998            T   164712       1 <NA>      17             1     17
#> 7      40 1998            T   164712       1 <NA>      17             1     17
#> 8      40 1998            T   164712       1 <NA>      17             1     17
#> 9      40 1998            T   164712       1 <NA>      17             1     17
#> 10     40 1998            T   164712       1 <NA>      17             1     17
#> 11     40 1998            T   164712       1 <NA>      17             1     17
#> 12     25 1996            W   126436       1 <NA>      93             1     93
#> 13     32 1993            W   126436       1 <NA>     454             1    454
#> 14     40 1998            T   164712       1 <NA>      17             1     17
#> 15     22 1994            T   164712       1 <NA>      37             1     37
#> 16      5 1996            W   126436       1 <NA>     406             1    406
#> 17      3 1998            T   164712       1 <NA>     546             1    273
#> 18     36 2003            T   164712       1 <NA>     255             1    255
#> 19     20 1993            W   126436       1 <NA>     326             1    163
#> 20     50 1994            T   164712       1 <NA>     775             1    775
#> 21     18 2000            W   126436       1 <NA>     454             1    227
#> 22     40 1996            W   126436       1 <NA>      29             1     29
#> 23      4 2002            T   164712       1 <NA>    1958             1   1958
#> 24     64 1998            T   164712       1 <NA>     130             1    130
#> 25     11 2002            W   126436       1 <NA>    3090             1   1545
#> 26     21 2000            W   126436       1 <NA>    1076             1    538
#> 27     34 2000            W   126436       1 <NA>     638             1    319
#> 28     47 1996            W   126436       1 <NA>      78             1     78
#> 29     26 1996            W   126436       1 <NA>      54             1     27
#> 30      2 1995            W   126436       1 <NA>    2736             1   1368
#> 31     15 1998            T   164712       1 <NA>      20             1     10
#> 32     17 1999            T   164712       1 <NA>    1484             1    742
#> 33     30 1996            W   126436       1 <NA>     197             1    197
#> 34     23 1995            W   126436       1 <NA>     223             1    223
#> 35     13 2006            W   126436       1 <NA>      24             1     12
#> 36     41 2008            T   164712       1 <NA>     864             1    864
#> 37     72 1993            W   126436       1 <NA>     128             1    128
#> 38     39 1994            T   164712       1 <NA>     457             1    457
#> 39     26 1998            T   164712       1 <NA>     126             1     63
#> 40     24 1994            T   164712       1 <NA>     260             1    130
#> 41     66 2005            W   126436       1 <NA>     764             1    764
#> 42      3 2001            T   164712       1 <NA>     132             1    132
#> 43     10 1993            W   126436       1 <NA>      85             1     85
#> 44     24 2008            T   164712       1 <NA>    1696             1    848
#> 45     35 1998            T   164712       1 <NA>    1214             1    390
#> 46     33 1998            T   164712       1 <NA>     302             1    151
#> 47     17 2009            W   126436       1 <NA>     972             1    972
#> 48      9 1993            W   126436       1 <NA>     281             1    277
#> 49     21 1993            W   126436       1 <NA>    5932             1   1499
#> 50     46 2000            T   164712       1 <NA>     180             1    180
#>    SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1          1     NA       13275        0       335          1     <NA>
#> 2          1     NA       13275        0       285          1     <NA>
#> 3          1     NA       13275        0       225          2     <NA>
#> 4          1     NA       13275        0       220          1     <NA>
#> 5          1     NA       13275        0       215          1     <NA>
#> 6          1     NA       13275        0       180          1     <NA>
#> 7          1     NA       13275        0       175          1     <NA>
#> 8          1     NA       13275        0       160          1     <NA>
#> 9          1     NA       13275        0       150          2     <NA>
#> 10         1     NA       13275        0       145          3     <NA>
#> 11         1     NA       13275        0       140          1     <NA>
#> 12         1 113900      113900        .      1360          1     <NA>
#> 13         1     NA      234500        1       127          1     <NA>
#> 14         1     NA       13275        0       130          2     <NA>
#> 15         1     NA        1033        1       127          1     <NA>
#> 16         1 257280      257280        .      1230          1     <NA>
#> 17         1     NA      484399        1       121          2     <NA>
#> 18         1     NA      158200        1       121          1     <NA>
#> 19         1     NA        2190        1       116          2     <NA>
#> 20         1     NA        5718        1       118          1     <NA>
#> 21         1     NA      268580        1       118          2     <NA>
#> 22         1  48300       48300        .      1170          1     <NA>
#> 23         1     NA     1040200        1       118          1     <NA>
#> 24         1     NA      157570        1       116          1     <NA>
#> 25         1     NA     1302400        1       117          2     <NA>
#> 26         1     NA      303000        1       116          2     <NA>
#> 27         1     NA      158200        1       115          2     <NA>
#> 28         1 119000      119000        .      1130          1     <NA>
#> 29         1     NA       96200        1       113          2     <NA>
#> 30         1     NA      607800        1       111          2     <NA>
#> 31         1     NA       76000        1       113          2     <NA>
#> 32         1     NA      267459        1       113          2     <NA>
#> 33         1 192900      192900        .      1120          1     <NA>
#> 34         1 149100      149100        .      1100          1     <NA>
#> 35         1     NA       39544        1       118          2     <NA>
#> 36         1     NA      247830        1       119          1     <NA>
#> 37         1     NA        1079        1       109          1     <NA>
#> 38         1     NA          42        1       112          1     <NA>
#> 39         1     NA       48400        1       111          2     <NA>
#> 40         1     NA        2374        1       111          2     <NA>
#> 41         1 139400      139400        .      1150          1     <NA>
#> 42         1     NA       61000        1       111          1     <NA>
#> 43         1     NA       46000        1       107          1     <NA>
#> 44         1     NA      968860        1       116          2     <NA>
#> 45         1     NA       16278        1       109          3     <NA>
#> 46         1     NA        2962        1       109          2     <NA>
#> 47         1 322476      322476        .      1130          1     <NA>
#> 48         1     NA        1297        1       106          1     <NA>
#> 49         1     NA     5688600        1       106          3     <NA>
#> 50         1     NA       46605        1       109          1     <NA>
#>    LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1           NA          20140617      126436  DAN2  DAN2
#> 2           NA          20140617      126436  DAN2  DAN2
#> 3           NA          20140617      126436  DAN2  DAN2
#> 4           NA          20140617      126436  DAN2  DAN2
#> 5           NA          20140617      126436  DAN2  DAN2
#> 6           NA          20140617      126436  DAN2  DAN2
#> 7           NA          20140617      126436  DAN2  DAN2
#> 8           NA          20140617      126436  DAN2  DAN2
#> 9           NA          20140617      126436  DAN2  DAN2
#> 10          NA          20140617      126436  DAN2  DAN2
#> 11          NA          20140617      126436  DAN2  DAN2
#> 12          NA          20190208      126436  DAN2  DAN2
#> 13          NA          20211203      126436   ARG   ARG
#> 14          NA          20140617      126436  DAN2  DAN2
#> 15          NA          20161213      126436  DAN2  DAN2
#> 16          NA          20190208      126436  DAN2  DAN2
#> 17          NA          20140617      126436   ARG   ARG
#> 18          NA          20131108      126436  DAN2  DAN2
#> 19          NA          20211203      126436   SOL   SOL
#> 20          NA          20161213      126436  DAN2  DAN2
#> 21          NA          20131112      126436   ARG   ARG
#> 22          NA          20190208      126436  DAN2  DAN2
#> 23          NA          20131113      126436  DAN2  DAN2
#> 24          NA          20140617      126436  DAN2  DAN2
#> 25          NA          20131113      126436   ARG   ARG
#> 26          NA          20190228      126436   ARG   ARG
#> 27          NA          20190228      126436   ARG   ARG
#> 28          NA          20190208      126436  DAN2  DAN2
#> 29          NA          20161115      126436   ARG   ARG
#> 30          NA          20161115      126436   ARG   ARG
#> 31          NA          20140617      126436   ARG   ARG
#> 32          NA          20131112      126436   ARG   ARG
#> 33          NA          20190208      126436  DAN2  DAN2
#> 34          NA          20190207      126436  DAN2  DAN2
#> 35          NA          20200115      126436  ATLD  ATLD
#> 36          NA          20180423      126436  SOL2  SOL2
#> 37          NA          20211203      126436  DAN2  DAN2
#> 38          NA          20161213      126436  DAN2  DAN2
#> 39          NA          20140617      126436   ARG   ARG
#> 40          NA          20161213      126436   SOL   SOL
#> 41          NA          20131204      126436  DAN2  DAN2
#> 42          NA          20160714      126436  DAN2  DAN2
#> 43          NA          20211203      126436   ARG   ARG
#> 44          NA          20180507      126436   BAL   BAL
#> 45          NA          20140617      126436  ATLD  ATLD
#> 46          NA          20140617      126436  ATLD  ATLD
#> 47          NA          20131216      126436  DAN2  DAN2
#> 48          NA          20211203      126436  DAN2  DAN2
#> 49          NA          20131111      126436   ARG   ARG
#> 50          NA          20131112      126436  DAN2  DAN2
#>                             IDx sub_div Rect HaulVal StdSpecRecCode
#> 1     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 2     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 3     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 4     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 5     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 6     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 7     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 8     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 9     1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 10    1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 11    1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 12    1996.1.DEN.26D4.GRT.45.25      26 41H0       V              1
#> 13    1993.1.SWE.77AR.FOT.80.32      25 39G5       V              1
#> 14    1998.1.DEN.26D4.GRT.83.40      25 38G5       V              1
#> 15    1994.1.DEN.26D4.GRT.43.22      26 41H0       V              1
#> 16      1996.1.DEN.26D4.GRT.9.5      25 38G5       V              1
#> 17    1998.1.SWE.77AR.FOT.191.3      25 40G5       V              1
#> 18    2003.1.DEN.26D4.TVL.71.36      25 39G5       V              1
#> 19    1993.1.GFR.06S1.H20.46.20      24 39G4       V              1
#> 20   1994.1.DEN.26D4.GRT.117.50      25 38G5       V              1
#> 21   2000.4.SWE.77AR.GOV.584.18      25 40G5       V              1
#> 22    1996.1.DEN.26D4.GRT.75.40      25 39G6       V              1
#> 23     2002.4.DEN.26D4.TVL.10.4      24 39G4       V              1
#> 24   1998.1.DEN.26D4.GRT.136.64      26 39G8       V              1
#> 25   2002.4.SWE.77AR.TVL.573.11      25 41G7       V              1
#> 26   2000.1.SWE.77AR.GOV.220.21      24 39G3       V              1
#> 27   2000.1.SWE.77AR.GOV.237.34      25 40G5       V              1
#> 28   1996.1.DEN.26D4.GRT.111.47      25 39G5       V              1
#> 29   1996.4.SWE.77AR.FOT.231.26      28 43G8       V              1
#> 30    1995.4.SWE.77AR.FOT.247.2      25 40G5       V              1
#> 31   1998.1.SWE.77AR.FOT.207.15      25 40G7       V              1
#> 32   1999.4.SWE.77AR.GOV.652.17      25 40G5       V              1
#> 33    1996.1.DEN.26D4.GRT.53.30      26 41G9       V              1
#> 34    1995.1.DEN.26D4.GRT.57.23      26 41G9       V              1
#> 35    2006.1.RUS.RUJB.TVL.NA.13      26 39H0       V              1
#> 36 2008.1.GFR.06SL.TVS.24316.41      24 39G4       V              1
#> 37   1993.1.DEN.26D4.GRT.162.72      25 38G5       V              1
#> 38    1994.1.DEN.26D4.GRT.74.39      26 41G8       V              1
#> 39   1998.1.SWE.77AR.FOT.222.26      28 43H0       V              1
#> 40    1994.1.GFR.06S1.H20.44.24      24 39G3       V              1
#> 41    2005.1.DEN.26D4.TVL.95.66      25 39G5       V              1
#> 42      2001.1.DEN.26D4.TVL.6.3      24 39G3       V              1
#> 43    1993.1.SWE.77AR.FOT.58.10      25 41G7       V              1
#> 44 2008.4.POL.67BC.TVL.26132.24      26 38G8       V              3
#> 45    1998.1.RUS.RUJB.HAK.NA.35      26 40G9       V              1
#> 46    1998.1.RUS.RUJB.HAK.NA.33      26 40G9       V              1
#> 47    2009.1.DEN.26D4.TVL.19.17      24 39G4       V              1
#> 48     1993.1.DEN.26D4.GRT.17.9      24 39G4       V              1
#> 49   1993.4.SWE.77AR.FOT.252.21      25 41G7       V              1
#> 50    2000.4.DEN.26D4.TVL.94.46      25 39G6       V              1
#>    BySpecRecCode Fishing.line Month DataType HaulDur GroundSpeed
#> 1              1        -9.00     3        R      60         3.0
#> 2              1        -9.00     3        R      60         3.0
#> 3              1        -9.00     3        R      60         3.0
#> 4              1        -9.00     3        R      60         3.0
#> 5              1        -9.00     3        R      60         3.0
#> 6              1        -9.00     3        R      60         3.0
#> 7              1        -9.00     3        R      60         3.0
#> 8              1        -9.00     3        R      60         3.0
#> 9              1        -9.00     3        R      60         3.0
#> 10             1        -9.00     3        R      60         3.0
#> 11             1        -9.00     3        R      60         3.0
#> 12             1        -9.00     2        R      60         3.4
#> 13             1        83.00     3        C      60         3.0
#> 14             1        -9.00     3        R      60         3.0
#> 15             1        -9.00     3        C      60        -9.0
#> 16             1        -9.00     2        R      60         3.6
#> 17             1        83.00     2        C      30         3.3
#> 18             1        63.46     3        R      30         3.0
#> 19             1        36.00     2        C      30         4.0
#> 20             1        -9.00     3        C      60        -9.0
#> 21             1       160.00    11        C      30         3.7
#> 22             1        -9.00     3        R      60         3.2
#> 23             1        63.46    11        R      30         3.1
#> 24             1        -9.00     3        R      60         3.3
#> 25             1        63.46    11        C      30         3.0
#> 26             1       160.00     3        C      30         3.5
#> 27             1       160.00     3        C      30         3.5
#> 28             1        -9.00     3        R      60         3.5
#> 29             1        83.00    12        C      30         3.3
#> 30             1        83.00    12        C      30         3.7
#> 31             1        83.00     2        C      30         3.3
#> 32             1       160.00    11        C      30         3.3
#> 33             1        -9.00     3        R      60         3.1
#> 34             1        -9.00     2        R      60        -9.0
#> 35             1        63.46     3        C      30         3.0
#> 36             1        33.22     3        R      30         3.2
#> 37             1        -9.00     3        C      60        -9.0
#> 38             1        -9.00     3        C      60        -9.0
#> 39             1        83.00     3        C      30         3.4
#> 40             1        36.00     2        C      30         3.8
#> 41             1        63.46     3        R      30         3.0
#> 42             1        63.46     2        R      30         3.0
#> 43             1        83.00     3        C      60         3.5
#> 44             0        63.46    11        C      30         3.0
#> 45             1        -9.00     3        C      30         3.6
#> 46             1        -9.00     3        C      30         3.6
#> 47             1        63.46     3        R      30         2.9
#> 48             1        -9.00     3        C      59        -9.0
#> 49             1        83.00    11        C      30         3.0
#> 50             1        63.46    11        R      31         2.9
#>                         haul.id ShootLat ShootLong       id_haul_stomach
#> 1     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 2     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 3     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 4     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 5     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 6     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 7     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 8     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 9     1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 10    1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 11    1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 12    1996:1:DEN:DAN2:GRT:45:25  56.3700   20.0666  1996.1.2.DEN.41H0.25
#> 13     1993:1:SWE:ARG:FOT:80:32  55.4120   15.3205  1993.1.3.SWE.39G5.32
#> 14    1998:1:DEN:DAN2:GRT:83:40  54.8150   15.3217  1998.1.3.DEN.38G5.40
#> 15    1994:1:DEN:DAN2:GRT:43:22  56.4500   20.2000  1994.1.3.DEN.41H0.22
#> 16      1996:1:DEN:DAN2:GRT:9:5  54.5750   15.3200   1996.1.2.DEN.38G5.5
#> 17     1998:1:SWE:ARG:FOT:191:3  55.8218   15.4220   1998.1.2.SWE.40G5.3
#> 18    2003:1:DEN:DAN2:TVL:71:36  55.3709   15.5140  2003.1.3.DEN.39G5.36
#> 19     1993:1:GFR:SOL:H20:46:20  55.1333   14.1667  1993.1.2.GFR.39G4.20
#> 20   1994:1:DEN:DAN2:GRT:117:50  54.7833   15.9500  1994.1.3.DEN.38G5.50
#> 21    2000:4:SWE:ARG:GOV:584:18  55.8150   15.3917 2000.4.11.SWE.40G5.18
#> 22    1996:1:DEN:DAN2:GRT:75:40  55.1983   16.4916  1996.1.3.DEN.39G6.40
#> 23     2002:4:DEN:DAN2:TVL:10:4  55.4908   14.6121  2002.4.11.DEN.39G4.4
#> 24   1998:1:DEN:DAN2:GRT:136:64  55.4800   18.4683  1998.1.3.DEN.39G8.64
#> 25    2002:4:SWE:ARG:TVL:573:11  56.1460   17.7615 2002.4.11.SWE.41G7.11
#> 26    2000:1:SWE:ARG:GOV:220:21  55.0100   13.9300  2000.1.3.SWE.39G3.21
#> 27    2000:1:SWE:ARG:GOV:237:34  55.8467   15.5717  2000.1.3.SWE.40G5.34
#> 28   1996:1:DEN:DAN2:GRT:111:47  55.2150   15.6083  1996.1.3.DEN.39G5.47
#> 29    1996:4:SWE:ARG:FOT:231:26  57.0833   18.9000 1996.4.12.SWE.43G8.26
#> 30     1995:4:SWE:ARG:FOT:247:2  55.8323   15.5427  1995.4.12.SWE.40G5.2
#> 31    1998:1:SWE:ARG:FOT:207:15  55.9271   17.1108  1998.1.2.SWE.40G7.15
#> 32    1999:4:SWE:ARG:GOV:652:17  55.8400   15.5600 1999.4.11.SWE.40G5.17
#> 33    1996:1:DEN:DAN2:GRT:53:30  56.2566   19.5666  1996.1.3.DEN.41G9.30
#> 34    1995:1:DEN:DAN2:GRT:57:23  56.3666   19.8666  1995.1.2.DEN.41G9.23
#> 35    2006:1:RUS:ATLD:TVL:NA:13  55.1583   20.0267  2006.1.3.RUS.39H0.13
#> 36 2008:1:GFR:SOL2:TVS:24316:41  55.1505   14.0718  2008.1.3.GFR.39G4.41
#> 37   1993:1:DEN:DAN2:GRT:162:72  54.8500   15.6667  1993.1.3.DEN.38G5.72
#> 38    1994:1:DEN:DAN2:GRT:74:39  56.1167   18.2667  1994.1.3.DEN.41G8.39
#> 39    1998:1:SWE:ARG:FOT:222:26  57.2520   20.7436  1998.1.3.SWE.43H0.26
#> 40     1994:1:GFR:SOL:H20:44:24  55.0333   13.4333  1994.1.2.GFR.39G3.24
#> 41    2005:1:DEN:DAN2:TVL:95:66  55.0966   15.2680  2005.1.3.DEN.39G5.66
#> 42      2001:1:DEN:DAN2:TVL:6:3  55.0214   13.7642   2001.1.2.DEN.39G3.3
#> 43     1993:1:SWE:ARG:FOT:58:10  56.0352   17.7188  1993.1.3.SWE.41G7.10
#> 44  2008:4:POL:BAL:TVL:26132:24  54.5400   18.8867 2008.4.11.POL.38G8.24
#> 45    1998:1:RUS:ATLD:HAK:NA:35  55.5167   19.8833  1998.1.3.RUS.40G9.35
#> 46    1998:1:RUS:ATLD:HAK:NA:33  55.6167   19.6500  1998.1.3.RUS.40G9.33
#> 47    2009:1:DEN:DAN2:TVL:19:17  55.3195   14.9859  2009.1.3.DEN.39G4.17
#> 48     1993:1:DEN:DAN2:GRT:17:9  55.0333   14.2167   1993.1.3.DEN.39G4.9
#> 49    1993:4:SWE:ARG:FOT:252:21  56.1133   17.5917 1993.4.11.SWE.41G7.21
#> 50    2000:4:DEN:DAN2:TVL:94:46  55.1058   16.3850 2000.4.11.DEN.39G6.46
#>         Species   CPUEun           a        b length_cm length_cm2 weight_kg
#> 1  Gadus morhua 1.000000 0.007345618 3.082202       335        335 445.37563
#> 2  Gadus morhua 1.000000 0.007345618 3.082202       285        285 270.61760
#> 3  Gadus morhua 2.000000 0.007345618 3.082202       225        225 130.59602
#> 4  Gadus morhua 1.000000 0.007345618 3.082202       220        220 121.85635
#> 5  Gadus morhua 1.000000 0.007345618 3.082202       215        215 113.52063
#> 6  Gadus morhua 1.000000 0.007345618 3.082202       180        180  65.64985
#> 7  Gadus morhua 1.000000 0.007345618 3.082202       175        175  60.19004
#> 8  Gadus morhua 1.000000 0.007345618 3.082202       160        160  45.66372
#> 9  Gadus morhua 2.000000 0.007345618 3.082202       150        150  37.42667
#> 10 Gadus morhua 3.000000 0.007345618 3.082202       145        145  33.71329
#> 11 Gadus morhua 1.000000 0.007345618 3.082202       140        140  30.25718
#> 12 Gadus morhua 1.000000 0.008380383 3.055535       136        136  27.69283
#> 13 Gadus morhua 1.000000 0.007244481 3.100800       127        127  24.18146
#> 14 Gadus morhua 2.000000 0.007345618 3.082202       130        130  24.07846
#> 15 Gadus morhua 1.000000 0.008339286 3.052078       127        127  21.98379
#> 16 Gadus morhua 1.000000 0.008380383 3.055535       123        123  20.37242
#> 17 Gadus morhua 2.000000 0.007345618 3.082202       121        121  19.30161
#> 18 Gadus morhua 2.000000 0.007444717 3.070310       121        121  18.47755
#> 19 Gadus morhua 2.000000 0.007244481 3.100800       116        116  18.25909
#> 20 Gadus morhua 1.000000 0.008339286 3.052078       118        118  17.56609
#> 21 Gadus morhua 2.000000 0.007554303 3.071838       118        118  17.48561
#> 22 Gadus morhua 1.000000 0.008380383 3.055535       117        117  17.48553
#> 23 Gadus morhua 2.000000 0.007243730 3.076256       118        118  17.12383
#> 24 Gadus morhua 1.000000 0.007345618 3.082202       116        116  16.94747
#> 25 Gadus morhua 2.000000 0.007243730 3.076256       117        117  16.68132
#> 26 Gadus morhua 2.000000 0.007554303 3.071838       116        116  16.59110
#> 27 Gadus morhua 2.000000 0.007554303 3.071838       115        115  16.15566
#> 28 Gadus morhua 1.000000 0.008380383 3.055535       113        113  15.72235
#> 29 Gadus morhua 2.000000 0.008380383 3.055535       113        113  15.72235
#> 30 Gadus morhua 2.000000 0.007216770 3.098570       111        111  15.70067
#> 31 Gadus morhua 2.000000 0.007345618 3.082202       113        113  15.63258
#> 32 Gadus morhua 2.000000 0.006831731 3.097311       113        113  15.61542
#> 33 Gadus morhua 1.000000 0.008380383 3.055535       112        112  15.30107
#> 34 Gadus morhua 1.000000 0.007216770 3.098570       110        110  15.26652
#> 35 Gadus morhua 2.000000 0.009208543 3.001584       118        118  15.24467
#> 36 Gadus morhua 2.000000 0.009197813 2.994628       119        119  15.10690
#> 37 Gadus morhua 1.000000 0.007244481 3.100800       109        109  15.05428
#> 38 Gadus morhua 1.000000 0.008339286 3.052078       112        112  14.97969
#> 39 Gadus morhua 2.000000 0.007345618 3.082202       111        111  14.79541
#> 40 Gadus morhua 2.000000 0.008339286 3.052078       111        111  14.57521
#> 41 Gadus morhua 2.000000 0.008377248 3.027632       115        115  14.52564
#> 42 Gadus morhua 2.000000 0.007693063 3.064660       111        111  14.26658
#> 43 Gadus morhua 1.000000 0.007244481 3.100800       107        107  14.21416
#> 44 Gadus morhua 2.000000 0.009197813 2.994628       116        116  13.99484
#> 45 Gadus morhua 3.000000 0.007345618 3.082202       109        109  13.98905
#> 46 Gadus morhua 2.000000 0.007345618 3.082202       109        109  13.98905
#> 47 Gadus morhua 2.000000 0.007746254 3.044723       113        113  13.80850
#> 48 Gadus morhua 1.000000 0.007244481 3.100800       106        106  13.80627
#> 49 Gadus morhua 3.000000 0.007244481 3.100800       106        106  13.80627
#> 50 Gadus morhua 1.935484 0.007554303 3.071838       109        109  13.70373
#>    CPUEun_kg
#> 1  445.37563
#> 2  270.61760
#> 3  261.19205
#> 4  121.85635
#> 5  113.52063
#> 6   65.64985
#> 7   60.19004
#> 8   45.66372
#> 9   74.85333
#> 10 101.13986
#> 11  30.25718
#> 12  27.69283
#> 13  24.18146
#> 14  48.15692
#> 15  21.98379
#> 16  20.37242
#> 17  38.60322
#> 18  36.95509
#> 19  36.51819
#> 20  17.56609
#> 21  34.97121
#> 22  17.48553
#> 23  34.24765
#> 24  16.94747
#> 25  33.36265
#> 26  33.18221
#> 27  32.31132
#> 28  15.72235
#> 29  31.44471
#> 30  31.40135
#> 31  31.26517
#> 32  31.23085
#> 33  15.30107
#> 34  15.26652
#> 35  30.48935
#> 36  30.21379
#> 37  15.05428
#> 38  14.97969
#> 39  29.59081
#> 40  29.15042
#> 41  29.05128
#> 42  28.53316
#> 43  14.21416
#> 44  27.98968
#> 45  41.96716
#> 46  27.97811
#> 47  27.61701
#> 48  13.80627
#> 49  41.41880
#> 50  26.52336
hlcodL <- hlcodL %>% filter(weight_kg < 100 & length_cm2 < 135)

ggplot(hlcodL, aes(weight_kg, length_cm2)) +
  geom_point() + 
  facet_wrap(~Year)



# Flounder
# First standardize length to cm and then check how zero-catches are implemented at this stage
hlfleL <- hlfleL %>% 
  mutate(length_cm = ifelse(LngtCode %in% c(".", "0"), 
                            LngtClass/10,
                            LngtClass)) # Standardize length (https://vocab.ices.dk/?ref=18)

filter(hlfleL, length_cm == 0) # No such thing
#> # A tibble: 0 × 51
#> # … with 51 variables: RecordType <chr>, Survey <chr>, Quarter <int>,
#> #   Country <chr>, Ship <chr>, Gear <chr>, SweepLngt <int>, GearEx <chr>,
#> #   DoorType <lgl>, StNo <chr>, HaulNo <int>, Year <int>, SpecCodeType <chr>,
#> #   SpecCode <int>, SpecVal <fct>, Sex <chr>, TotalNo <dbl>,
#> #   CatIdentifier <int>, NoMeas <int>, SubFactor <dbl>, SubWgt <int>,
#> #   CatCatchWgt <int>, LngtCode <chr>, LngtClass <int>, HLNoAtLngt <dbl>,
#> #   DevStage <chr>, LenMeasType <int>, DateofCalculation <int>, …

bits_ca_fle <- bits_ca_fle %>% 
  drop_na(IndWgt) %>% 
  drop_na(LngtClass) %>% 
  filter(IndWgt > 0 & LngtClass > 0) %>%  # Filter positive length and weight
  mutate(weight_kg = IndWgt/1000) %>% 
  mutate(length_cm = ifelse(LngtCode == ".", 
                            LngtClass/10,
                            LngtClass)) %>% # Standardize length ((https://vocab.ices.dk/?ref=18))
  mutate(keep = ifelse(LngtCode == "." & Year == 2008, "N", "Y")) %>%
  filter(keep == "Y") %>% 
  filter(length_cm < 70)

# Now check if all rows where length is NA are the ones with zero catch!
hlfleL %>% 
  mutate(length2 = replace_na(length_cm, -9),
         no_length = ifelse(length2 < 0, "T", "F")) %>% 
  ggplot(., aes(length2, CPUEun, color = no_length)) + geom_point(alpha = 0.2) + facet_wrap(~no_length)
#> Warning: Removed 13 rows containing missing values (geom_point).


hlfleL %>% mutate(length2 = replace_na(length_cm, -9)) %>% group_by(length2) %>% distinct(CPUEun) %>% arrange(CPUEun)
#> # A tibble: 12,344 × 2
#> # Groups:   length2 [253]
#>    CPUEun length2
#>     <dbl>   <dbl>
#>  1  0        -9  
#>  2  0.667    19  
#>  3  0.667    21  
#>  4  0.667    35  
#>  5  0.667    39  
#>  6  0.667    40  
#>  7  0.667    42  
#>  8  0.870    27  
#>  9  0.870    32  
#> 10  0.870    37.5
#> # … with 12,334 more rows

# Right, so all hauls with zero catch have NA length_cm. I don't have any NA catches
t <- hlfleL %>% drop_na(CPUEun)
# Well, 11 rows. I will remove them
hlfleL <- hlfleL %>% drop_na(CPUEun)
t <- hlfleL %>% filter(CPUEun == 0) %>% distinct(length_cm)
t <- hlfleL %>% drop_na(length_cm)

# In other words, a zero catch is when the catch is zero and length_cm is NA
# In order to not get any NA CPUEs in unit biomass because length is NA (I want them instead
# to be 0, as the numbers-CPUE is), I will replace length_cm == NA with length_cm == 0 before
# calculating biomass cpue
hlfleL <- hlfleL %>% mutate(length_cm2 = replace_na(length_cm, 0))

# Standardize length in the haul-data and calculate weight
hlfleL <- hlfleL %>% 
  mutate(weight_kg = (a*length_cm2^b)/1000) %>% 
  mutate(CPUEun_kg = weight_kg*CPUEun)

# Plot and check it's correct also in this data
ggplot(hlfleL, aes(weight_kg, length_cm2)) +
  geom_point() + 
  facet_wrap(~Year)


# Check
t <- hlfleL %>% drop_na(CPUEun_kg) # Should not have any NA in biomass-catch
t <- hlfleL %>% filter(CPUEun_kg == 0) # Should result in a few percent of rows (note this is not proportion of hauls, but rows)
t <- hlfleL %>% drop_na(length_cm2) # Should be no NA


# Plaice
# First standardize length to cm and then check how zero-catches are implemented at this stage
hlplaL <- hlplaL %>% 
  mutate(length_cm = ifelse(LngtCode %in% c(".", "0"), 
                            LngtClass/10,
                            LngtClass)) # Standardize length (https://vocab.ices.dk/?ref=18)

filter(hlplaL, length_cm == 0) # No such thing
#> # A tibble: 0 × 51
#> # … with 51 variables: RecordType <chr>, Survey <chr>, Quarter <int>,
#> #   Country <chr>, Ship <chr>, Gear <chr>, SweepLngt <int>, GearEx <chr>,
#> #   DoorType <lgl>, StNo <chr>, HaulNo <int>, Year <int>, SpecCodeType <chr>,
#> #   SpecCode <int>, SpecVal <fct>, Sex <chr>, TotalNo <dbl>,
#> #   CatIdentifier <int>, NoMeas <int>, SubFactor <dbl>, SubWgt <int>,
#> #   CatCatchWgt <int>, LngtCode <chr>, LngtClass <int>, HLNoAtLngt <dbl>,
#> #   DevStage <chr>, LenMeasType <int>, DateofCalculation <int>, …

# Now check if all rows where length is NA are the ones with zero catch!
hlplaL %>% 
  mutate(length2 = replace_na(length_cm, -9),
         no_length = ifelse(length2 < 0, "T", "F")) %>% 
  ggplot(., aes(length2, CPUEun, color = no_length)) + geom_point(alpha = 0.2) + facet_wrap(~no_length)
#> Warning: Removed 67 rows containing missing values (geom_point).


hlplaL %>% mutate(length2 = replace_na(length_cm, -9)) %>% group_by(length2) %>% distinct(CPUEun) %>% arrange(CPUEun)
#> # A tibble: 2,890 × 2
#> # Groups:   length2 [159]
#>    CPUEun length2
#>     <dbl>   <dbl>
#>  1  0          -9
#>  2  0.667      31
#>  3  0.667      32
#>  4  0.667      34
#>  5  0.667      44
#>  6  0.968      24
#>  7  0.968      33
#>  8  0.984      32
#>  9  0.984      36
#> 10  0.984      33
#> # … with 2,880 more rows

# Right, so all hauls with zero catch have NA length_cm. I don't have any NA catches (but I do)
t <- hlplaL %>% drop_na(CPUEun)
hlplaL <- hlplaL %>% drop_na(CPUEun)

# In other words, a zero catch is when the catch is zero and length_cm is NA
# In order to not get any NA CPUEs in unit biomass because length is NA (I want them instead
# to be 0, as the numbers-CPUE is), I will replace length_cm == NA with length_cm == 0 before
# calculating biomass cpue
hlplaL <- hlplaL %>% mutate(length_cm2 = replace_na(length_cm, 0))

# Standardize length in the haul-data and calculate weight
hlplaL <- hlplaL %>% 
  mutate(weight_kg = (a*length_cm2^b)/1000) %>% 
  mutate(CPUEun_kg = weight_kg*CPUEun)

# Plot and check it's correct also in this data
ggplot(hlplaL, aes(weight_kg, length_cm2)) +
  geom_point() + 
  facet_wrap(~Year)


# Check
t <- hlfleL %>% drop_na(CPUEun_kg) # Should not have any NA in biomass-catch
t <- hlfleL %>% filter(CPUEun_kg == 0) # Should result in a few percent of rows (note this is not proportion of hauls, but rows)
t <- hlfleL %>% drop_na(length_cm2) # Should be no NA


# Dab
# First standardize length to cm and then check how zero-catches are implemented at this stage
hldabL <- hldabL %>% 
  mutate(length_cm = ifelse(LngtCode %in% c(".", "0"), 
                            LngtClass/10,
                            LngtClass)) # Standardize length (https://vocab.ices.dk/?ref=18)

filter(hldabL, length_cm == 0) # No such thing
#> # A tibble: 0 × 51
#> # … with 51 variables: RecordType <chr>, Survey <chr>, Quarter <int>,
#> #   Country <chr>, Ship <chr>, Gear <chr>, SweepLngt <int>, GearEx <chr>,
#> #   DoorType <lgl>, StNo <chr>, HaulNo <int>, Year <int>, SpecCodeType <chr>,
#> #   SpecCode <int>, SpecVal <fct>, Sex <chr>, TotalNo <dbl>,
#> #   CatIdentifier <int>, NoMeas <int>, SubFactor <dbl>, SubWgt <int>,
#> #   CatCatchWgt <int>, LngtCode <chr>, LngtClass <int>, HLNoAtLngt <dbl>,
#> #   DevStage <chr>, LenMeasType <int>, DateofCalculation <int>, …

# Now check if all rows where length is NA are the ones with zero catch!
hldabL %>% 
  mutate(length2 = replace_na(length_cm, -9),
         no_length = ifelse(length2 < 0, "T", "F")) %>% 
  ggplot(., aes(length2, CPUEun, color = no_length)) + geom_point(alpha = 0.2) + facet_wrap(~no_length)


hldabL %>% mutate(length2 = replace_na(length_cm, -9)) %>% group_by(length2) %>% distinct(CPUEun) %>% arrange(CPUEun)
#> # A tibble: 1,014 × 2
#> # Groups:   length2 [36]
#>    CPUEun length2
#>     <dbl>   <dbl>
#>  1  0          -9
#>  2  0.667      22
#>  3  1          14
#>  4  1          22
#>  5  1          21
#>  6  1          25
#>  7  1          27
#>  8  1          23
#>  9  1          29
#> 10  1          19
#> # … with 1,004 more rows

# Right, so all hauls with zero catch have NA length_cm. I don't have any NA catches
t <- hldabL %>% drop_na(CPUEun)

# In other words, a zero catch is when the catch is zero and length_cm is NA
# In order to not get any NA CPUEs in unit biomass because length is NA (I want them instead
# to be 0, as the numbers-CPUE is), I will replace length_cm == NA with length_cm == 0 before
# calculating biomass cpue
hldabL <- hldabL %>% mutate(length_cm2 = replace_na(length_cm, 0))

# Standardize length in the haul-data and calculate weight
hldabL <- hldabL %>% 
  mutate(weight_kg = (a*length_cm2^b)/1000) %>% 
  mutate(CPUEun_kg = weight_kg*CPUEun)

# Plot and check it's correct also in this data
ggplot(hldabL, aes(weight_kg, length_cm2)) +
  geom_point() + 
  facet_wrap(~Year)


# Check
t <- hldabL %>% drop_na(CPUEun_kg) # Should not have any NA in biomass-catch
t <- hldabL %>% filter(CPUEun_kg == 0) # Should result in a few percent of rows (note this is not proportion of hauls, but rows)
t <- hldabL %>% drop_na(length_cm2) # Should be no NA
cod_0plot <- hlcodL %>%
  group_by(haul.id, Year, Quarter) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  group_by(Year, Quarter, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_zero_catch_hauls = Y/(N+Y),
         species = "Cod") 
#> group_by: 3 grouping variables (haul.id, Year, Quarter)
#> summarise: now 12,254 rows and 4 columns, 2 group variables remaining (haul.id, Year)
#> ungroup: no grouping variables
#> mutate: new variable 'zero_catch' (character) with 3 unique values and <1% NA
#> group_by: 3 grouping variables (Year, Quarter, zero_catch)
#> summarise: now 125 rows and 4 columns, 2 group variables remaining (Year, Quarter)
#> ungroup: no grouping variables
#> pivot_wider: reorganized (zero_catch, n) into (N, Y, NA) [was 125x4, now 68x5]
#> mutate: new variable 'prop_zero_catch_hauls' (double) with 56 unique values and 18% NA
#>         new variable 'species' (character) with one unique value and 0% NA

fle_0plot <- hlfleL %>%
  group_by(haul.id, Year, Quarter) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  group_by(Year, Quarter, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_zero_catch_hauls = Y/(N+Y),
         species = "Flounder")
#> group_by: 3 grouping variables (haul.id, Year, Quarter)
#> summarise: now 12,003 rows and 4 columns, 2 group variables remaining (haul.id, Year)
#> ungroup: no grouping variables
#> mutate: new variable 'zero_catch' (character) with 2 unique values and 0% NA
#> group_by: 3 grouping variables (Year, Quarter, zero_catch)
#> summarise: now 124 rows and 4 columns, 2 group variables remaining (Year, Quarter)
#> ungroup: no grouping variables
#> pivot_wider: reorganized (zero_catch, n) into (N, Y) [was 124x4, now 68x4]
#> mutate: new variable 'prop_zero_catch_hauls' (double) with 57 unique values and 18% NA
#>         new variable 'species' (character) with one unique value and 0% NA

pla_0plot <- hlplaL %>%
  group_by(haul.id, Year, Quarter) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  group_by(Year, Quarter, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_zero_catch_hauls = Y/(N+Y),
         species = "Plaice")
#> group_by: 3 grouping variables (haul.id, Year, Quarter)
#> summarise: now 11,480 rows and 4 columns, 2 group variables remaining (haul.id, Year)
#> ungroup: no grouping variables
#> mutate: new variable 'zero_catch' (character) with 2 unique values and 0% NA
#> group_by: 3 grouping variables (Year, Quarter, zero_catch)
#> summarise: now 124 rows and 4 columns, 2 group variables remaining (Year, Quarter)
#> ungroup: no grouping variables
#> pivot_wider: reorganized (zero_catch, n) into (N, Y) [was 124x4, now 68x4]
#> mutate: new variable 'prop_zero_catch_hauls' (double) with 55 unique values and 18% NA
#>         new variable 'species' (character) with one unique value and 0% NA

dab_0plot <- hldabL %>%
  group_by(haul.id, Year, Quarter) %>%
  summarise(CPUEun_haul = sum(CPUEun)) %>% 
  ungroup() %>% 
  mutate(zero_catch = ifelse(CPUEun_haul == 0, "Y", "N")) %>% 
  group_by(Year, Quarter, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_zero_catch_hauls = Y/(N+Y),
         species = "Dab")
#> group_by: 3 grouping variables (haul.id, Year, Quarter)
#> summarise: now 11,477 rows and 4 columns, 2 group variables remaining (haul.id, Year)
#> ungroup: no grouping variables
#> mutate: new variable 'zero_catch' (character) with 2 unique values and 0% NA
#> group_by: 3 grouping variables (Year, Quarter, zero_catch)
#> summarise: now 124 rows and 4 columns, 2 group variables remaining (Year, Quarter)
#> ungroup: no grouping variables
#> pivot_wider: reorganized (zero_catch, n) into (N, Y) [was 124x4, now 68x4]
#> mutate: new variable 'prop_zero_catch_hauls' (double) with 57 unique values and 18% NA
#>         new variable 'species' (character) with one unique value and 0% NA

Standardize according to Orio

To get unit: kg of fish caught by trawling for 1 h a standard bottom swept area of 0.45km2 using a TVL trawl with 75 m sweeps at the standard speed of three knots

# Remove hauls done with the TVL gear with a SweepLngt < 50 (these are calibration hauls, pers. com. Anders & Ale)
# And also hauls without length-information
# Remove pelagic gear
hlcodL <- hlcodL %>%
  mutate(SweepLngt2 = replace_na(SweepLngt, 50)) %>% 
  mutate(keep = ifelse(Gear == "TVL" & SweepLngt2 < 50, "N", "Y")) %>% 
  filter(keep == "Y") %>% 
  dplyr::select(-keep, -SweepLngt2) %>% 
  filter(!Gear == "PEL")
  
hlfleL <- hlfleL %>%
  mutate(SweepLngt2 = replace_na(SweepLngt, 50)) %>% 
  mutate(keep = ifelse(Gear == "TVL" & SweepLngt2 < 50, "N", "Y")) %>% 
  filter(keep == "Y") %>% 
  dplyr::select(-keep, -SweepLngt2) %>% 
  filter(!Gear == "PEL")

hlplaL <- hlplaL %>%
  mutate(SweepLngt2 = replace_na(SweepLngt, 50)) %>% 
  mutate(keep = ifelse(Gear == "TVL" & SweepLngt2 < 50, "N", "Y")) %>% 
  filter(keep == "Y") %>% 
  dplyr::select(-keep, -SweepLngt2) %>% 
  filter(!Gear == "PEL")

hldabL <- hldabL %>%
  mutate(SweepLngt2 = replace_na(SweepLngt, 50)) %>% 
  mutate(keep = ifelse(Gear == "TVL" & SweepLngt2 < 50, "N", "Y")) %>% 
  filter(keep == "Y") %>% 
  dplyr::select(-keep, -SweepLngt2) %>% 
  filter(!Gear == "PEL")

# Add in RS and RSA-values from the sweep file
# CPUE should be multiplied with RS and RSA to standardize to a relative speed and gear dimension.
# There is not a single file will all RS and RSA values. Instead they come in three files:
# - sweep (non-Swedish hauls between 1991-2016)
# - + calculated based on trawl speed and gear dimensions.
# I will join in the RS and RSA values from all sources, then standardize and filter
# away non-standardized hauls
# sort(unique(sweep$Year))
# sort(unique(sweep$Country))

# Since I don't have the sweep data for Swedish data, I have to calculate it from scratch using the 
# equation in Orio's spreadsheet

# First I will join in the sweep data, 
sweep_sel <- sweep %>% rename("haul.id" = "ï..haul.id") %>% dplyr::select(haul.id, RSA, RS)

hlcodL2 <- left_join(hlcodL, sweep_sel)
hlfleL2 <- left_join(hlfleL, sweep_sel)
hlplaL2 <- left_join(hlplaL, sweep_sel)
hldabL2 <- left_join(hldabL, sweep_sel)

hlcodL2 <- hlcodL2 %>%
  rename("RS_sweep" = "RS",
         "RSA_sweep" = "RSA") %>% 
  mutate(RS_sweep = as.numeric(RS_sweep),
         RSA_sweep = as.numeric(RSA_sweep))

hlfleL2 <- hlfleL2 %>%
  rename("RS_sweep" = "RS",
         "RSA_sweep" = "RSA") %>% 
  mutate(RS_sweep = as.numeric(RS_sweep),
         RSA_sweep = as.numeric(RSA_sweep))

hlplaL2 <- hlplaL2 %>%
  rename("RS_sweep" = "RS",
         "RSA_sweep" = "RSA") %>% 
  mutate(RS_sweep = as.numeric(RS_sweep),
         RSA_sweep = as.numeric(RSA_sweep))

hldabL2 <- hldabL2 %>%
  rename("RS_sweep" = "RS",
         "RSA_sweep" = "RSA") %>% 
  mutate(RS_sweep = as.numeric(RS_sweep),
         RSA_sweep = as.numeric(RSA_sweep))


# I will calculate a RS and RSA column in the catch data based on Ale's equation in the sweep file:
sort(unique(hlcodL2$GroundSpeed))
#>  [1] -9.0  0.1  0.2  0.8  1.7  1.8  2.0  2.1  2.2  2.3  2.4  2.5  2.6  2.7  2.8
#> [16]  2.9  3.0  3.1  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9  4.0  4.1  4.2  4.3
#> [31]  4.4  4.5  4.6  4.7  4.9  5.0  5.2  5.3  5.4  5.5  5.6  5.7  5.9  6.0  6.1
#> [46]  6.2  6.3  6.6  6.7  6.8  6.9  7.1  7.3  8.6
sort(unique(hlcodL2$Fishing.line))
#> [1]  -9.00  28.00  33.22  36.00  39.80  63.46  83.00 160.00
sort(unique(hlcodL2$SweepLngt))
#>  [1]   0  40  50  60  75  87  90  95 100 110 135 180 185 200 203 225 235

# First replace -9 in the columns I use for the calculations with NA so I don't end up with real numbers that are wrong!
hlcodL2 <- hlcodL2 %>% mutate(GroundSpeed = ifelse(GroundSpeed == -9, NA, GroundSpeed),
                              Fishing.line = ifelse(Fishing.line == -9, NA, Fishing.line),
                              SweepLngt = ifelse(SweepLngt == -9, NA, SweepLngt))

hlfleL2 <- hlfleL2 %>% mutate(GroundSpeed = ifelse(GroundSpeed == -9, NA, GroundSpeed),
                              Fishing.line = ifelse(Fishing.line == -9, NA, Fishing.line),
                              SweepLngt = ifelse(SweepLngt == -9, NA, SweepLngt))

hlplaL2 <- hlplaL2 %>% mutate(GroundSpeed = ifelse(GroundSpeed == -9, NA, GroundSpeed),
                              Fishing.line = ifelse(Fishing.line == -9, NA, Fishing.line),
                              SweepLngt = ifelse(SweepLngt == -9, NA, SweepLngt))

hldabL2 <- hldabL2 %>% mutate(GroundSpeed = ifelse(GroundSpeed == -9, NA, GroundSpeed),
                              Fishing.line = ifelse(Fishing.line == -9, NA, Fishing.line),
                              SweepLngt = ifelse(SweepLngt == -9, NA, SweepLngt))


hlcodL2 %>% filter(Quarter == 1) %>%
  distinct(GroundSpeed, Fishing.line, SweepLngt) %>% as.data.frame() %>% head(50)
#>    SweepLngt Fishing.line GroundSpeed
#> 1         60           NA          NA
#> 2        110           NA          NA
#> 3         NA         36.0         3.8
#> 4         NA         36.0         3.6
#> 5         NA         36.0         4.0
#> 6         NA         36.0         4.6
#> 7         NA         36.0         3.4
#> 8         NA         36.0         4.2
#> 9         NA         36.0         3.2
#> 10        NA         36.0         2.6
#> 11        NA         36.0         2.8
#> 12        50        160.0         3.1
#> 13        50        160.0         2.9
#> 14        50        160.0         3.0
#> 15        50        160.0         2.6
#> 16        50        160.0         3.2
#> 17       185         83.0         3.1
#> 18       185         83.0         3.0
#> 19        NA         39.8          NA
#> 20       185         83.0         2.8
#> 21       185         83.0         3.4
#> 22       185         83.0         3.3
#> 23       185         83.0         3.5
#> 24       185         83.0         3.2
#> 25       185         83.0         2.7
#> 26       185         83.0         2.9
#> 27        NA         36.0         4.4
#> 28        NA         36.0         3.0
#> 29        NA         36.0         2.4
#> 30       180         83.0         3.1
#> 31       180         83.0         3.2
#> 32       180         83.0         3.0
#> 33       180         83.0         3.3
#> 34        50        160.0         3.4
#> 35       180         83.0         3.4
#> 36       180         83.0         3.5
#> 37       180         83.0         2.8
#> 38       180         83.0         3.9
#> 39        50        160.0         3.3
#> 40        NA           NA          NA
#> 41        NA         36.0         3.7
#> 42        NA         36.0         3.1
#> 43        NA           NA         3.2
#> 44        NA           NA         3.3
#> 45        NA           NA         3.6
#> 46        NA           NA         3.4
#> 47        90        160.0         3.4
#> 48        NA           NA         3.0
#> 49        NA           NA         3.8
#> 50        NA           NA         3.9

hlcodL2 %>% filter(Quarter == 4) %>%
  distinct(GroundSpeed, Fishing.line, SweepLngt) %>% as.data.frame() %>% head(50)
#>    SweepLngt Fishing.line GroundSpeed
#> 1        180           83         2.9
#> 2         NA           36         3.8
#> 3        180           83         2.8
#> 4        180           83         3.1
#> 5         NA           36         3.4
#> 6         NA           36         4.0
#> 7         NA           36         4.2
#> 8        180           83         3.2
#> 9        180           83         3.3
#> 10        NA           36         3.2
#> 11        NA           36         3.6
#> 12       180           83         3.0
#> 13        NA           36         3.0
#> 14       185           83         2.8
#> 15       185           83         3.0
#> 16       185           83         3.2
#> 17       185           83         3.3
#> 18       185           83         3.1
#> 19       185           83         3.5
#> 20       185           83         3.4
#> 21       185           83         3.6
#> 22        NA           36          NA
#> 23       203           83         3.6
#> 24       203           83         3.7
#> 25       203           83         3.5
#> 26       203           83         3.8
#> 27       203           83         3.9
#> 28       203           83         3.4
#> 29       100          160         3.4
#> 30       100           83         3.5
#> 31       100           83         3.4
#> 32       225           83         3.6
#> 33       225           83         3.5
#> 34       100           83         4.0
#> 35       100          160         3.5
#> 36       100           83         3.7
#> 37       100           83         3.3
#> 38       100           83         3.2
#> 39       100           83         3.6
#> 40       225           83         3.3
#> 41       225           83         3.4
#> 42       180           83         3.6
#> 43        NA           28          NA
#> 44       225           83         3.2
#> 45       180           83         3.4
#> 46       180           83         3.5
#> 47       225           83         3.7
#> 48       225           83         3.8
#> 49       185           83         3.8
#> 50       225           83         3.9

# Hmm, Q1 has at least one of the RS or RSA variables as NAs. Will be difficult to standardize!
# Hope the correction factors are present in Ales conversion data

# Now calculate correction factors
hlcodL2 <- hlcodL2 %>% mutate(RS_x = 3/GroundSpeed,
                              Horizontal.opening..m. = Fishing.line*0.67,
                              Swep.one.side..after.formula...meter = 0.258819045*SweepLngt, # SIN(RADIANS(15))
                              Size.final..m = Horizontal.opening..m. + (Swep.one.side..after.formula...meter*2),
                              Swept.area = (Size.final..m*3*1860)/1000000,
                              RSA_x = 0.45388309675081/Swept.area)

hlfleL2 <- hlfleL2 %>% mutate(RS_x = 3/GroundSpeed,
                              Horizontal.opening..m. = Fishing.line*0.67,
                              Swep.one.side..after.formula...meter = 0.258819045*SweepLngt, # SIN(RADIANS(15))
                              Size.final..m = Horizontal.opening..m. + (Swep.one.side..after.formula...meter*2),
                              Swept.area = (Size.final..m*3*1860)/1000000,
                              RSA_x = 0.45388309675081/Swept.area)

hlplaL2 <- hlplaL2 %>% mutate(RS_x = 3/GroundSpeed,
                              Horizontal.opening..m. = Fishing.line*0.67,
                              Swep.one.side..after.formula...meter = 0.258819045*SweepLngt, # SIN(RADIANS(15))
                              Size.final..m = Horizontal.opening..m. + (Swep.one.side..after.formula...meter*2),
                              Swept.area = (Size.final..m*3*1860)/1000000,
                              RSA_x = 0.45388309675081/Swept.area)

hldabL2 <- hldabL2 %>% mutate(RS_x = 3/GroundSpeed,
                              Horizontal.opening..m. = Fishing.line*0.67,
                              Swep.one.side..after.formula...meter = 0.258819045*SweepLngt, # SIN(RADIANS(15))
                              Size.final..m = Horizontal.opening..m. + (Swep.one.side..after.formula...meter*2),
                              Swept.area = (Size.final..m*3*1860)/1000000,
                              RSA_x = 0.45388309675081/Swept.area)

# Check EQ. is correct by recalculating it in the sweep file
sweep <- sweep %>% mutate(Horizontal.opening..m.2 = Fishing.line*0.67,
                          Swep.one.side..after.formula...meter2 = 0.258819045*SweepLngt, # SIN(RADIANS(15))
                          Size.final..m2 = Horizontal.opening..m.2 + (Swep.one.side..after.formula...meter2*2),
                          Swept.area2 = (Size.final..m2*3*1860)/1000000,
                          RSA_x = 0.45388309675081/Swept.area2)

sweep %>%
  drop_na() %>%
  ggplot(., aes(as.numeric(RSA), RSA_x)) + geom_point() + geom_abline(intercept = 0, slope = 1)

# Yes it's the same

# Replace NAs with -1/3 (because ICES codes missing values as -9 and in the calculation above they get -1/3),
# so that I can filter them easily later
# sort(unique(hlcodL2$RS_x))
# sort(unique(hlcodL2$RSA_x))

hlcodL2$RS_x[is.na(hlcodL2$RS_x)] <- -1/3
hlcodL2$RS_sweep[is.na(hlcodL2$RS_sweep)] <- -1/3
hlcodL2$RSA_x[is.na(hlcodL2$RSA_x)] <- -1/3
hlcodL2$RSA_sweep[is.na(hlcodL2$RSA_sweep)] <- -1/3

hlfleL2$RS_x[is.na(hlfleL2$RS_x)] <- -1/3
hlfleL2$RS_sweep[is.na(hlfleL2$RS_sweep)] <- -1/3
hlfleL2$RSA_x[is.na(hlfleL2$RSA_x)] <- -1/3
hlfleL2$RSA_sweep[is.na(hlfleL2$RSA_sweep)] <- -1/3

hlplaL2$RS_x[is.na(hlplaL2$RS_x)] <- -1/3
hlplaL2$RS_sweep[is.na(hlplaL2$RS_sweep)] <- -1/3
hlplaL2$RSA_x[is.na(hlplaL2$RSA_x)] <- -1/3
hlplaL2$RSA_sweep[is.na(hlplaL2$RSA_sweep)] <- -1/3

hldabL2$RS_x[is.na(hldabL2$RS_x)] <- -1/3
hldabL2$RS_sweep[is.na(hldabL2$RS_sweep)] <- -1/3
hldabL2$RSA_x[is.na(hldabL2$RSA_x)] <- -1/3
hldabL2$RSA_sweep[is.na(hldabL2$RSA_sweep)] <- -1/3

# Compare the difference correction factors (calculated vs imported from sweep file)
p1 <- ggplot(filter(hlcodL2, RS_x > 0), aes(RS_x)) + geom_histogram() + xlim(0.4, 1.76)
p2 <- ggplot(hlcodL2, aes(RSA_x)) + geom_histogram()
p3 <- ggplot(hlcodL2, aes(RS_sweep)) + geom_histogram()
p4 <- ggplot(hlcodL2, aes(RSA_sweep)) + geom_histogram()

(p1 + p2) / (p3 + p4)
#> Warning: Removed 284 rows containing non-finite values (stat_bin).
#> Warning: Removed 2 rows containing missing values (geom_bar).


p5 <- ggplot(filter(hlfleL2, RS_x > 0), aes(RS_x)) + geom_histogram() + xlim(0.4, 1.76)
p6 <- ggplot(hlfleL2, aes(RSA_x)) + geom_histogram()
p7 <- ggplot(hlfleL2, aes(RS_sweep)) + geom_histogram()
p8 <- ggplot(hlfleL2, aes(RSA_sweep)) + geom_histogram()

(p5 + p6) / (p7 + p8)
#> Warning: Removed 115 rows containing non-finite values (stat_bin).
#> Removed 2 rows containing missing values (geom_bar).


p9 <- ggplot(filter(hlplaL2, RS_x > 0), aes(RS_x)) + geom_histogram() + xlim(0.4, 1.76)
p10 <- ggplot(hlplaL2, aes(RSA_x)) + geom_histogram()
p11 <- ggplot(hlplaL2, aes(RS_sweep)) + geom_histogram()
p12 <- ggplot(hlplaL2, aes(RSA_sweep)) + geom_histogram()

(p9 + p10) / (p11 + p12)
#> Warning: Removed 56 rows containing non-finite values (stat_bin).
#> Removed 2 rows containing missing values (geom_bar).


p13 <- ggplot(filter(hldabL2, RS_x > 0), aes(RS_x)) + geom_histogram() + xlim(0.4, 1.76)
p14 <- ggplot(hldabL2, aes(RSA_x)) + geom_histogram()
p15 <- ggplot(hldabL2, aes(RS_sweep)) + geom_histogram()
p16 <- ggplot(hldabL2, aes(RSA_sweep)) + geom_histogram()

(p13 + p14) / (p15 + p16)
#> Warning: Removed 10 rows containing non-finite values (stat_bin).
#> Removed 2 rows containing missing values (geom_bar).


# Why do I have RSA values smaller than one? (either because sweep length is longer or gear is larger (GOV))
# Check if I can calculate the same RSA in sweep as that entered there.
# Ok, so the equation is correct. Which ID's have RSA < 1?
hlcodL2 %>% 
  filter(RSA_x < 1 & RSA_x > 0) %>%
  dplyr::select(Year, Country, Ship, Gear, haul.id, Horizontal.opening..m., Fishing.line,
                Swep.one.side..after.formula...meter, SweepLngt, Size.final..m, Swept.area, RSA_x) %>% 
  ggplot(., aes(RSA_x, fill = factor(SweepLngt))) + geom_histogram() + facet_wrap(~Gear, ncol = 1)


# Check if I have more than one unique RS or RSA value per haul, or if it's "either this or that"
# Filter positive in both columns
hlcodL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


hlcodL2 %>% filter(RSA_x > 0 & RSA_sweep > 0) %>% ggplot(., aes(RSA_x, RSA_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


hlfleL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


hlfleL2 %>% filter(RSA_x > 0 & RSA_sweep > 0) %>% ggplot(., aes(RSA_x, RSA_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


# Ok, there's on odd RS_x that is larger than 3. It didn't catch anything and speed is 0.8! Will remove
hlcodL2 <- hlcodL2 %>% filter(RS_x < 3)
hlfleL2 <- hlfleL2 %>% filter(RS_x < 3)

# Plot again
hlcodL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


hlfleL2 %>% filter(RS_x > 0 & RS_sweep > 0) %>% ggplot(., aes(RS_x, RS_sweep)) +
  geom_point() + geom_abline(aes(intercept = 0, slope = 1), color = "red")


# They are largely the same when they overlap. When they differ, I will use RS_sweep
# Make a single RS and RSA column

# Cod 
hlcodL3 <- hlcodL2 %>%
  mutate(RS = -99,
         RS = ifelse(RS_sweep > 0, RS_sweep, RS),
         RS = ifelse(RS < 0 & RS_x > 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
  mutate(RSA = -99,
         RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
         RSA = ifelse(RSA < 0 & RSA_x > 0, RSA_x, RSA)) %>%
  filter(RS > 0) %>%
  filter(RSA > 0) %>% 
  mutate(RSRSA = RS*RSA)

# Plot
ggplot(hlcodL3, aes(RSRSA)) + geom_histogram()



# Flounder 
hlfleL2 %>% filter(Country == "LAT") %>% distinct(Year) %>% arrange(Year)
#> # A tibble: 28 × 1
#>     Year
#>    <int>
#>  1  1993
#>  2  1994
#>  3  1995
#>  4  1996
#>  5  1997
#>  6  1998
#>  7  1999
#>  8  2000
#>  9  2001
#> 10  2002
#> # … with 18 more rows

hlfleL3 <- hlfleL2 %>%
  mutate(RS = -999,
         RS = ifelse(RS_sweep > 0, RS_sweep, RS),
         RS = ifelse(RS < 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
  mutate(RSA = -999,
         RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
         RSA = ifelse(RSA < 0, RSA_x, RSA)) %>% 
  filter(RS > 0) %>%
  filter(RSA > 0) %>% 
  mutate(RSRSA = RS*RSA)

# Plot
ggplot(hlfleL3, aes(RSRSA)) + geom_histogram()


# Test how many years of LAT data I miss out on because I can't standardize it.
# hlfleL2 %>%
#   mutate(RS = -999,
#          RS = ifelse(RS_sweep > 0, RS_sweep, RS),
#          RS = ifelse(RS < 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
#   filter(RS > 0) %>% 
#   filter(Country == "LAT") %>% 
#   distinct(Year) %>% 
#   arrange(Year)
#   
# hlfleL2 %>%
#   mutate(RSA = -999,
#          RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
#          RSA = ifelse(RSA < 0, RSA_x, RSA)) %>% 
#   filter(RSA > 0) %>% 
#   filter(Country == "LAT") %>% 
#   distinct(Year) %>% 
#   arrange(Year)

# Plaice 
hlplaL3 <- hlplaL2 %>%
  mutate(RS = -99,
         RS = ifelse(RS_sweep > 0, RS_sweep, RS),
         RS = ifelse(RS < 0 & RS_x > 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
  mutate(RSA = -99,
         RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
         RSA = ifelse(RSA < 0 & RSA_x > 0, RSA_x, RSA)) %>%
  filter(RS > 0) %>%
  filter(RSA > 0) %>% 
  mutate(RSRSA = RS*RSA)

# Plot
ggplot(hlplaL3, aes(RSRSA)) + geom_histogram()



# Dab 
hldabL3 <- hldabL2 %>%
  mutate(RS = -99,
         RS = ifelse(RS_sweep > 0, RS_sweep, RS),
         RS = ifelse(RS < 0 & RS_x > 0, RS_x, RS)) %>% # Note that there are no NA i RS_x. This replaces all non-RS_sweep values -0.3, so I can filter positive later
  mutate(RSA = -99,
         RSA = ifelse(RSA_sweep > 0, RSA_sweep, RSA),
         RSA = ifelse(RSA < 0 & RSA_x > 0, RSA_x, RSA)) %>%
  filter(RS > 0) %>%
  filter(RSA > 0) %>% 
  mutate(RSRSA = RS*RSA)

# Plot
ggplot(hldabL3, aes(RSRSA)) + geom_histogram()



# Standardize!
hlcodL3 <- hlcodL3 %>%
  mutate(CPUEst_kg = CPUEun_kg*RS*RSA,
         CPUEst = CPUEun*RS*RSA)

hlfleL3 <- hlfleL3 %>%
  mutate(CPUEst_kg = CPUEun_kg*RS*RSA,
         CPUEst = CPUEun*RS*RSA)

hlplaL3 <- hlplaL3 %>%
  mutate(CPUEst_kg = CPUEun_kg*RS*RSA,
         CPUEst = CPUEun*RS*RSA)
  
hldabL3 <- hldabL3 %>%
  mutate(CPUEst_kg = CPUEun_kg*RS*RSA,
         CPUEst = CPUEun*RS*RSA)


unique(is.na(hlcodL3$CPUEst_kg))
#> [1] FALSE
unique(is.na(hlcodL3$CPUEst))
#> [1] FALSE
min(hlcodL3$CPUEst_kg)
#> [1] 0
min(hlcodL3$CPUEst)
#> [1] 0

unique(is.na(hlfleL3$CPUEst_kg)) # Remove the few NA's here
#> [1] FALSE
hlfleL3 <- hlfleL3 %>% drop_na(CPUEst_kg)
unique(is.na(hlfleL3$CPUEst))
#> [1] FALSE
min(hlfleL3$CPUEst_kg) 
#> [1] 0
min(hlfleL3$CPUEst)
#> [1] 0

unique(is.na(hlplaL3$CPUEst_kg))
#> [1] FALSE
unique(is.na(hlplaL3$CPUEst))
#> [1] FALSE
min(hlplaL3$CPUEst_kg)
#> [1] 0
min(hlplaL3$CPUEst)
#> [1] 0

unique(is.na(hldabL3$CPUEst_kg))
#> [1] FALSE
unique(is.na(hldabL3$CPUEst))
#> [1] FALSE
min(hldabL3$CPUEst_kg)
#> [1] 0
min(hldabL3$CPUEst)
#> [1] 0

# Now calculate CPUE PER LENGTH CLASS, then create the new unit, i.e.: convert from kg of fish caught by trawling for 1 h a standard bottom swept area of 0.45km2 (using a TVL trawl with 75 m sweeps at the standard speed of three knots) to... kg of fish per km^2 by dividing with 0.45

p1 <- ggplot(hlcodL3) +
  geom_histogram(aes(length_cm2, fill = "length_cm1"), alpha = 0.5)  

p2 <- ggplot(hlcodL3) +
  geom_histogram(aes(length_cm2, fill = "length_cm2"), alpha = 0.5) 

p1/p2



# Cod
hlcodhaul <- hlcodL3 %>%
  mutate(cpue_kg = CPUEst_kg,
         cpue = CPUEst,
         cpue_kg_un = CPUEun_kg,
         cpue_un = CPUEun,
         density = cpue_kg/0.45,
         density_ab = cpue/0.45)

# Flounder
hlflehaul <- hlfleL3 %>%
  mutate(cpue_kg = CPUEst_kg,
         cpue = CPUEst,
         cpue_kg_un = CPUEun_kg,
         cpue_un = CPUEun,
         density = cpue_kg/0.45,
         density_ab = cpue/0.45)

# Plaice
hlplahaul <- hlplaL3 %>%
  mutate(cpue_kg = CPUEst_kg,
         cpue = CPUEst,
         cpue_kg_un = CPUEun_kg,
         cpue_un = CPUEun,
         density = cpue_kg/0.45,
         density_ab = cpue/0.45)

# Dab
hldabhaul <- hldabL3 %>%
  mutate(cpue_kg = CPUEst_kg,
         cpue = CPUEst,
         cpue_kg_un = CPUEun_kg,
         cpue_un = CPUEun,
         density = cpue_kg/0.45,
         density_ab = cpue/0.45)

# First, figure out why I have length = 0 and density = 0 when I have other lengths in the haul
hlcodhaul %>% filter(haul.id == "1993:1:GFR:SOL:H20:23:31") %>% as.data.frame()
#>   RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType StNo
#> 1         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#> 2         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#> 3         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#> 4         HH   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#>   HaulNo Year SpecCodeType SpecCode   SpecVal  Sex TotalNo CatIdentifier NoMeas
#> 1     31 1993            W   126436         1 <NA>       6             1      3
#> 2     31 1993            W   126436         1 <NA>       6             1      3
#> 3     31 1993            W   126436         1 <NA>       6             1      3
#> 4     31 1993         <NA>       NA zeroCatch <NA>      NA            NA     NA
#>   SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1         1     NA          25        1         6          2     <NA>
#> 2         1     NA          25        1        35          2     <NA>
#> 3         1     NA          25        1        39          2     <NA>
#> 4        NA     NA          NA     <NA>        NA         NA     <NA>
#>   LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1          NA          20211203      126436   SOL   SOL
#> 2          NA          20211203      126436   SOL   SOL
#> 3          NA          20211203      126436   SOL   SOL
#> 4          NA          20220301          NA   SOL   SOL
#>                         IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#> 2 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#> 3 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#> 4 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#>   Fishing.line Month DataType HaulDur GroundSpeed                  haul.id
#> 1           36     2        C      30           4 1993:1:GFR:SOL:H20:23:31
#> 2           36     2        C      30           4 1993:1:GFR:SOL:H20:23:31
#> 3           36     2        C      30           4 1993:1:GFR:SOL:H20:23:31
#> 4           36     2        C      30           4 1993:1:GFR:SOL:H20:23:31
#>   ShootLat ShootLong      id_haul_stomach      Species CPUEun           a
#> 1  54.5167     14.15 1993.1.2.GFR.38G4.31 Gadus morhua      2 0.007244481
#> 2  54.5167     14.15 1993.1.2.GFR.38G4.31 Gadus morhua      2 0.007244481
#> 3  54.5167     14.15 1993.1.2.GFR.38G4.31 Gadus morhua      2 0.007244481
#> 4  54.5167     14.15                 <NA> Gadus morhua      0 0.007244481
#>        b length_cm length_cm2   weight_kg   CPUEun_kg RSA_sweep RS_sweep RS_x
#> 1 3.1008         6          6 0.001874558 0.003749116      1.47     0.75 0.75
#> 2 3.1008        35         35 0.444481726 0.888963452      1.47     0.75 0.75
#> 3 3.1008        39         39 0.621699998 1.243399996      1.47     0.75 0.75
#> 4 3.1008        NA          0 0.000000000 0.000000000      1.47     0.75 0.75
#>   Horizontal.opening..m. Swep.one.side..after.formula...meter Size.final..m
#> 1                  24.12                                   NA            NA
#> 2                  24.12                                   NA            NA
#> 3                  24.12                                   NA            NA
#> 4                  24.12                                   NA            NA
#>   Swept.area      RSA_x   RS  RSA  RSRSA CPUEst_kg CPUEst   cpue_kg  cpue
#> 1         NA -0.3333333 0.75 1.47 1.1025 0.0041334  2.205 0.0041334 2.205
#> 2         NA -0.3333333 0.75 1.47 1.1025 0.9800822  2.205 0.9800822 2.205
#> 3         NA -0.3333333 0.75 1.47 1.1025 1.3708485  2.205 1.3708485 2.205
#> 4         NA -0.3333333 0.75 1.47 1.1025 0.0000000  0.000 0.0000000 0.000
#>    cpue_kg_un cpue_un     density density_ab
#> 1 0.003749116       2 0.009185334        4.9
#> 2 0.888963452       2 2.177960458        4.9
#> 3 1.243399996       2 3.046329990        4.9
#> 4 0.000000000       0 0.000000000        0.0

hlcodhaul %>%
  group_by(haul.id) %>% 
  mutate(no_catches = length(unique(CPUEun))) %>% 
  filter(any(CPUEun == 0)) %>% 
  filter(no_catches > 1) %>% 
  as.data.frame() %>% 
  head(50)
#>    RecordType Survey Quarter Country Ship Gear SweepLngt GearEx DoorType StNo
#> 1          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 2          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 3          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 4          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 5          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 6          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 7          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 8          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 9          HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 10         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 11         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 12         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 13         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 14         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 15         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 16         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 17         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 18         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 19         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 20         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 21         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 22         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   21
#> 23         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   22
#> 24         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   22
#> 25         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#> 26         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#> 27         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   23
#> 28         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   24
#> 29         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   24
#> 30         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   24
#> 31         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   24
#> 32         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   24
#> 33         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 34         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 35         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 36         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 37         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 38         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 39         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 40         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 41         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 42         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 43         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 44         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 45         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 46         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 47         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 48         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 49         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#> 50         HL   BITS       1     GFR 06S1  H20        NA   <NA>       NA   25
#>    HaulNo Year SpecCodeType SpecCode SpecVal  Sex TotalNo CatIdentifier NoMeas
#> 1       1 1993            W   126436       1 <NA>     110             1     55
#> 2       1 1993            W   126436       1 <NA>     110             1     55
#> 3       1 1993            W   126436       1 <NA>     110             1     55
#> 4       1 1993            W   126436       1 <NA>     110             1     55
#> 5       1 1993            W   126436       1 <NA>     110             1     55
#> 6       1 1993            W   126436       1 <NA>     110             1     55
#> 7       1 1993            W   126436       1 <NA>     110             1     55
#> 8       1 1993            W   126436       1 <NA>     110             1     55
#> 9       1 1993            W   126436       1 <NA>     110             1     55
#> 10      1 1993            W   126436       1 <NA>     110             1     55
#> 11      1 1993            W   126436       1 <NA>     110             1     55
#> 12      1 1993            W   126436       1 <NA>     110             1     55
#> 13      1 1993            W   126436       1 <NA>     110             1     55
#> 14      1 1993            W   126436       1 <NA>     110             1     55
#> 15      1 1993            W   126436       1 <NA>     110             1     55
#> 16      1 1993            W   126436       1 <NA>     110             1     55
#> 17      1 1993            W   126436       1 <NA>     110             1     55
#> 18      1 1993            W   126436       1 <NA>     110             1     55
#> 19      1 1993            W   126436       1 <NA>     110             1     55
#> 20      1 1993            W   126436       1 <NA>     110             1     55
#> 21      1 1993            W   126436       1 <NA>     110             1     55
#> 22      1 1993            W   126436       1 <NA>     110             1     55
#> 23     32 1993            W   126436       1 <NA>       4             1      2
#> 24     32 1993            W   126436       1 <NA>       4             1      2
#> 25     31 1993            W   126436       1 <NA>       6             1      3
#> 26     31 1993            W   126436       1 <NA>       6             1      3
#> 27     31 1993            W   126436       1 <NA>       6             1      3
#> 28     30 1993            W   126436       1 <NA>      10             1      5
#> 29     30 1993            W   126436       1 <NA>      10             1      5
#> 30     30 1993            W   126436       1 <NA>      10             1      5
#> 31     30 1993            W   126436       1 <NA>      10             1      5
#> 32     30 1993            W   126436       1 <NA>      10             1      5
#> 33      2 1993            W   126436       1 <NA>     998             1    153
#> 34      2 1993            W   126436       1 <NA>     998             1    153
#> 35      2 1993            W   126436       1 <NA>     998             1    153
#> 36      2 1993            W   126436       1 <NA>     998             1    153
#> 37      2 1993            W   126436       1 <NA>     998             1    153
#> 38      2 1993            W   126436       1 <NA>     998             1    153
#> 39      2 1993            W   126436       1 <NA>     998             1    153
#> 40      2 1993            W   126436       1 <NA>     998             1    153
#> 41      2 1993            W   126436       1 <NA>     998             1    153
#> 42      2 1993            W   126436       1 <NA>     998             1    153
#> 43      2 1993            W   126436       1 <NA>     998             1    153
#> 44      2 1993            W   126436       1 <NA>     998             1    153
#> 45      2 1993            W   126436       1 <NA>     998             1    153
#> 46      2 1993            W   126436       1 <NA>     998             1    153
#> 47      2 1993            W   126436       1 <NA>     998             1    153
#> 48      2 1993            W   126436       1 <NA>     998             1    153
#> 49      2 1993            W   126436       1 <NA>     998             1    153
#> 50      2 1993            W   126436       1 <NA>     998             1    153
#>    SubFactor SubWgt CatCatchWgt LngtCode LngtClass HLNoAtLngt DevStage
#> 1          1     NA         680        1        14          4     <NA>
#> 2          1     NA         680        1        16          4     <NA>
#> 3          1     NA         680        1        18          4     <NA>
#> 4          1     NA         680        1        27          8     <NA>
#> 5          1     NA         680        1        28          8     <NA>
#> 6          1     NA         680        1        29          8     <NA>
#> 7          1     NA         680        1        58          2     <NA>
#> 8          1     NA         680        1        59          2     <NA>
#> 9          1     NA         680        1        60          2     <NA>
#> 10         1     NA         680        1        65          2     <NA>
#> 11         1     NA         680        1        37          2     <NA>
#> 12         1     NA         680        1        44          4     <NA>
#> 13         1     NA         680        1        48          2     <NA>
#> 14         1     NA         680        1        49          2     <NA>
#> 15         1     NA         680        1        50          4     <NA>
#> 16         1     NA         680        1        53          2     <NA>
#> 17         1     NA         680        1        30          8     <NA>
#> 18         1     NA         680        1        31         10     <NA>
#> 19         1     NA         680        1        32          8     <NA>
#> 20         1     NA         680        1        33         14     <NA>
#> 21         1     NA         680        1        34          6     <NA>
#> 22         1     NA         680        1        35          4     <NA>
#> 23         1     NA          41        1        42          2     <NA>
#> 24         1     NA          41        1        47          2     <NA>
#> 25         1     NA          25        1         6          2     <NA>
#> 26         1     NA          25        1        35          2     <NA>
#> 27         1     NA          25        1        39          2     <NA>
#> 28         1     NA          78        1        33          2     <NA>
#> 29         1     NA          78        1        35          2     <NA>
#> 30         1     NA          78        1        37          2     <NA>
#> 31         1     NA          78        1        44          2     <NA>
#> 32         1     NA          78        1        52          2     <NA>
#> 33         1     NA        3260        1        20          7     <NA>
#> 34         1     NA        3260        1        21         13     <NA>
#> 35         1     NA        3260        1        24         13     <NA>
#> 36         1     NA        3260        1        25         46     <NA>
#> 37         1     NA        3260        1        26         46     <NA>
#> 38         1     NA        3260        1        27        130     <NA>
#> 39         1     NA        3260        1        41          7     <NA>
#> 40         1     NA        3260        1        46         13     <NA>
#> 41         1     NA        3260        1        47          7     <NA>
#> 42         1     NA        3260        1        49          7     <NA>
#> 43         1     NA        3260        1        34         52     <NA>
#> 44         1     NA        3260        1        35         26     <NA>
#> 45         1     NA        3260        1        36         20     <NA>
#> 46         1     NA        3260        1        37          7     <NA>
#> 47         1     NA        3260        1        38         13     <NA>
#> 48         1     NA        3260        1        39          7     <NA>
#> 49         1     NA        3260        1        28        130     <NA>
#> 50         1     NA        3260        1        29        117     <NA>
#>    LenMeasType DateofCalculation Valid_Aphia Ship2 Ship3
#> 1           NA          20211203      126436   SOL   SOL
#> 2           NA          20211203      126436   SOL   SOL
#> 3           NA          20211203      126436   SOL   SOL
#> 4           NA          20211203      126436   SOL   SOL
#> 5           NA          20211203      126436   SOL   SOL
#> 6           NA          20211203      126436   SOL   SOL
#> 7           NA          20211203      126436   SOL   SOL
#> 8           NA          20211203      126436   SOL   SOL
#> 9           NA          20211203      126436   SOL   SOL
#> 10          NA          20211203      126436   SOL   SOL
#> 11          NA          20211203      126436   SOL   SOL
#> 12          NA          20211203      126436   SOL   SOL
#> 13          NA          20211203      126436   SOL   SOL
#> 14          NA          20211203      126436   SOL   SOL
#> 15          NA          20211203      126436   SOL   SOL
#> 16          NA          20211203      126436   SOL   SOL
#> 17          NA          20211203      126436   SOL   SOL
#> 18          NA          20211203      126436   SOL   SOL
#> 19          NA          20211203      126436   SOL   SOL
#> 20          NA          20211203      126436   SOL   SOL
#> 21          NA          20211203      126436   SOL   SOL
#> 22          NA          20211203      126436   SOL   SOL
#> 23          NA          20211203      126436   SOL   SOL
#> 24          NA          20211203      126436   SOL   SOL
#> 25          NA          20211203      126436   SOL   SOL
#> 26          NA          20211203      126436   SOL   SOL
#> 27          NA          20211203      126436   SOL   SOL
#> 28          NA          20211203      126436   SOL   SOL
#> 29          NA          20211203      126436   SOL   SOL
#> 30          NA          20211203      126436   SOL   SOL
#> 31          NA          20211203      126436   SOL   SOL
#> 32          NA          20211203      126436   SOL   SOL
#> 33          NA          20211203      126436   SOL   SOL
#> 34          NA          20211203      126436   SOL   SOL
#> 35          NA          20211203      126436   SOL   SOL
#> 36          NA          20211203      126436   SOL   SOL
#> 37          NA          20211203      126436   SOL   SOL
#> 38          NA          20211203      126436   SOL   SOL
#> 39          NA          20211203      126436   SOL   SOL
#> 40          NA          20211203      126436   SOL   SOL
#> 41          NA          20211203      126436   SOL   SOL
#> 42          NA          20211203      126436   SOL   SOL
#> 43          NA          20211203      126436   SOL   SOL
#> 44          NA          20211203      126436   SOL   SOL
#> 45          NA          20211203      126436   SOL   SOL
#> 46          NA          20211203      126436   SOL   SOL
#> 47          NA          20211203      126436   SOL   SOL
#> 48          NA          20211203      126436   SOL   SOL
#> 49          NA          20211203      126436   SOL   SOL
#> 50          NA          20211203      126436   SOL   SOL
#>                          IDx sub_div Rect HaulVal StdSpecRecCode BySpecRecCode
#> 1   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 2   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 3   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 4   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 5   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 6   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 7   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 8   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 9   1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 10  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 11  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 12  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 13  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 14  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 15  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 16  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 17  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 18  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 19  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 20  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 21  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 22  1993.1.GFR.06S1.H20.21.1      24 38G3       V              1             1
#> 23 1993.1.GFR.06S1.H20.22.32      24 38G4       V              1             1
#> 24 1993.1.GFR.06S1.H20.22.32      24 38G4       V              1             1
#> 25 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#> 26 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#> 27 1993.1.GFR.06S1.H20.23.31      24 38G4       V              1             1
#> 28 1993.1.GFR.06S1.H20.24.30      24 37G3       V              1             1
#> 29 1993.1.GFR.06S1.H20.24.30      24 37G3       V              1             1
#> 30 1993.1.GFR.06S1.H20.24.30      24 37G3       V              1             1
#> 31 1993.1.GFR.06S1.H20.24.30      24 37G3       V              1             1
#> 32 1993.1.GFR.06S1.H20.24.30      24 37G3       V              1             1
#> 33  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 34  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 35  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 36  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 37  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 38  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 39  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 40  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 41  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 42  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 43  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 44  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 45  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 46  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 47  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 48  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 49  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#> 50  1993.1.GFR.06S1.H20.25.2      24 38G3       V              1             1
#>    Fishing.line Month DataType HaulDur GroundSpeed                  haul.id
#> 1            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 2            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 3            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 4            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 5            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 6            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 7            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 8            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 9            36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 10           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 11           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 12           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 13           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 14           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 15           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 16           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 17           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 18           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 19           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 20           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 21           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 22           36     2        C      30         3.8  1993:1:GFR:SOL:H20:21:1
#> 23           36     2        C      30         3.6 1993:1:GFR:SOL:H20:22:32
#> 24           36     2        C      30         3.6 1993:1:GFR:SOL:H20:22:32
#> 25           36     2        C      30         4.0 1993:1:GFR:SOL:H20:23:31
#> 26           36     2        C      30         4.0 1993:1:GFR:SOL:H20:23:31
#> 27           36     2        C      30         4.0 1993:1:GFR:SOL:H20:23:31
#> 28           36     2        C      30         3.8 1993:1:GFR:SOL:H20:24:30
#> 29           36     2        C      30         3.8 1993:1:GFR:SOL:H20:24:30
#> 30           36     2        C      30         3.8 1993:1:GFR:SOL:H20:24:30
#> 31           36     2        C      30         3.8 1993:1:GFR:SOL:H20:24:30
#> 32           36     2        C      30         3.8 1993:1:GFR:SOL:H20:24:30
#> 33           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 34           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 35           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 36           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 37           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 38           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 39           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 40           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 41           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 42           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 43           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 44           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 45           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 46           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 47           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 48           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 49           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#> 50           36     2        C      30         3.6  1993:1:GFR:SOL:H20:25:2
#>    ShootLat ShootLong      id_haul_stomach      Species CPUEun           a
#> 1   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      4 0.007244481
#> 2   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      4 0.007244481
#> 3   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      4 0.007244481
#> 4   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      8 0.007244481
#> 5   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      8 0.007244481
#> 6   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      8 0.007244481
#> 7   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 8   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 9   54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 10  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 11  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 12  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      4 0.007244481
#> 13  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 14  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 15  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      4 0.007244481
#> 16  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      2 0.007244481
#> 17  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      8 0.007244481
#> 18  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua     10 0.007244481
#> 19  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      8 0.007244481
#> 20  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua     14 0.007244481
#> 21  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      6 0.007244481
#> 22  54.6833   13.0000  1993.1.2.GFR.38G3.1 Gadus morhua      4 0.007244481
#> 23  54.5167   14.2667 1993.1.2.GFR.38G4.32 Gadus morhua      2 0.007244481
#> 24  54.5167   14.2667 1993.1.2.GFR.38G4.32 Gadus morhua      2 0.007244481
#> 25  54.5167   14.1500 1993.1.2.GFR.38G4.31 Gadus morhua      2 0.007244481
#> 26  54.5167   14.1500 1993.1.2.GFR.38G4.31 Gadus morhua      2 0.007244481
#> 27  54.5167   14.1500 1993.1.2.GFR.38G4.31 Gadus morhua      2 0.007244481
#> 28  54.4833   13.9833 1993.1.2.GFR.37G3.30 Gadus morhua      2 0.007244481
#> 29  54.4833   13.9833 1993.1.2.GFR.37G3.30 Gadus morhua      2 0.007244481
#> 30  54.4833   13.9833 1993.1.2.GFR.37G3.30 Gadus morhua      2 0.007244481
#> 31  54.4833   13.9833 1993.1.2.GFR.37G3.30 Gadus morhua      2 0.007244481
#> 32  54.4833   13.9833 1993.1.2.GFR.37G3.30 Gadus morhua      2 0.007244481
#> 33  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua      7 0.007244481
#> 34  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     13 0.007244481
#> 35  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     13 0.007244481
#> 36  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     46 0.007244481
#> 37  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     46 0.007244481
#> 38  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua    130 0.007244481
#> 39  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua      7 0.007244481
#> 40  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     13 0.007244481
#> 41  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua      7 0.007244481
#> 42  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua      7 0.007244481
#> 43  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     52 0.007244481
#> 44  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     26 0.007244481
#> 45  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     20 0.007244481
#> 46  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua      7 0.007244481
#> 47  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua     13 0.007244481
#> 48  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua      7 0.007244481
#> 49  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua    130 0.007244481
#> 50  54.7333   13.0833  1993.1.2.GFR.38G3.2 Gadus morhua    117 0.007244481
#>         b length_cm length_cm2   weight_kg    CPUEun_kg RSA_sweep RS_sweep
#> 1  3.1008        14         14 0.025937100  0.103748400      1.47     0.79
#> 2  3.1008        16         16 0.039241253  0.156965010      1.47     0.79
#> 3  3.1008        18         18 0.056540106  0.226160424      1.47     0.79
#> 4  3.1008        27         27 0.198783552  1.590268412      1.47     0.79
#> 5  3.1008        28         28 0.222512961  1.780103692      1.47     0.79
#> 6  3.1008        29         29 0.248091227  1.984729814      1.47     0.79
#> 7  3.1008        58         58 2.128361064  4.256722129      1.47     0.79
#> 8  3.1008        59         59 2.244221418  4.488442837      1.47     0.79
#> 9  3.1008        60         60 2.364281503  4.728563006      1.47     0.79
#> 10 3.1008        65         65 3.030327268  6.060654536      1.47     0.79
#> 11 3.1008        37         37 0.528065302  1.056130604      1.47     0.79
#> 12 3.1008        44         44 0.903703173  3.614812692      1.47     0.79
#> 13 3.1008        48         48 1.183588094  2.367176188      1.47     0.79
#> 14 3.1008        49         49 1.261733885  2.523467769      1.47     0.79
#> 15 3.1008        50         50 1.343302904  5.373211615      1.47     0.79
#> 16 3.1008        53         53 1.609319924  3.218639848      1.47     0.79
#> 17 3.1008        30         30 0.275591162  2.204729295      1.47     0.79
#> 18 3.1008        31         31 0.305085835  3.050858346      1.47     0.79
#> 19 3.1008        32         32 0.336648559  2.693188473      1.47     0.79
#> 20 3.1008        33         33 0.370352888  5.184940437      1.47     0.79
#> 21 3.1008        34         34 0.406272607  2.437635642      1.47     0.79
#> 22 3.1008        35         35 0.444481726  1.777926905      1.47     0.79
#> 23 3.1008        42         42 0.782310468  1.564620936      1.47     0.83
#> 24 3.1008        47         47 1.108788707  2.217577415      1.47     0.83
#> 25 3.1008         6          6 0.001874558  0.003749116      1.47     0.75
#> 26 3.1008        35         35 0.444481726  0.888963452      1.47     0.75
#> 27 3.1008        39         39 0.621699998  1.243399996      1.47     0.75
#> 28 3.1008        33         33 0.370352888  0.740705777      1.47     0.79
#> 29 3.1008        35         35 0.444481726  0.888963452      1.47     0.79
#> 30 3.1008        37         37 0.528065302  1.056130604      1.47     0.79
#> 31 3.1008        44         44 0.903703173  1.807406346      1.47     0.79
#> 32 3.1008        52         52 1.517018710  3.034037419      1.47     0.79
#> 33 3.1008        20         20 0.078386533  0.548705733      1.47     0.83
#> 34 3.1008        21         21 0.091189586  1.185464615      1.47     0.83
#> 35 3.1008        24         24 0.137964290  1.793535765      1.47     0.83
#> 36 3.1008        25         25 0.156581358  7.202742459      1.47     0.83
#> 37 3.1008        26         26 0.176830444  8.134200440      1.47     0.83
#> 38 3.1008        27         27 0.198783552 25.841861699      1.47     0.83
#> 39 3.1008        41         41 0.725985458  5.081898208      1.47     0.83
#> 40 3.1008        46         46 1.037259063 13.484367814      1.47     0.83
#> 41 3.1008        47         47 1.108788707  7.761520951      1.47     0.83
#> 42 3.1008        49         49 1.261733885  8.832137193      1.47     0.83
#> 43 3.1008        34         34 0.406272607 21.126175567      1.47     0.83
#> 44 3.1008        35         35 0.444481726 11.556524879      1.47     0.83
#> 45 3.1008        36         36 0.485054476  9.701089523      1.47     0.83
#> 46 3.1008        37         37 0.528065302  3.696457113      1.47     0.83
#> 47 3.1008        38         38 0.573588857  7.456655137      1.47     0.83
#> 48 3.1008        39         39 0.621699998  4.351899986      1.47     0.83
#> 49 3.1008        28         28 0.222512961 28.926684992      1.47     0.83
#> 50 3.1008        29         29 0.248091227 29.026673534      1.47     0.83
#>         RS_x Horizontal.opening..m. Swep.one.side..after.formula...meter
#> 1  0.7894737                  24.12                                   NA
#> 2  0.7894737                  24.12                                   NA
#> 3  0.7894737                  24.12                                   NA
#> 4  0.7894737                  24.12                                   NA
#> 5  0.7894737                  24.12                                   NA
#> 6  0.7894737                  24.12                                   NA
#> 7  0.7894737                  24.12                                   NA
#> 8  0.7894737                  24.12                                   NA
#> 9  0.7894737                  24.12                                   NA
#> 10 0.7894737                  24.12                                   NA
#> 11 0.7894737                  24.12                                   NA
#> 12 0.7894737                  24.12                                   NA
#> 13 0.7894737                  24.12                                   NA
#> 14 0.7894737                  24.12                                   NA
#> 15 0.7894737                  24.12                                   NA
#> 16 0.7894737                  24.12                                   NA
#> 17 0.7894737                  24.12                                   NA
#> 18 0.7894737                  24.12                                   NA
#> 19 0.7894737                  24.12                                   NA
#> 20 0.7894737                  24.12                                   NA
#> 21 0.7894737                  24.12                                   NA
#> 22 0.7894737                  24.12                                   NA
#> 23 0.8333333                  24.12                                   NA
#> 24 0.8333333                  24.12                                   NA
#> 25 0.7500000                  24.12                                   NA
#> 26 0.7500000                  24.12                                   NA
#> 27 0.7500000                  24.12                                   NA
#> 28 0.7894737                  24.12                                   NA
#> 29 0.7894737                  24.12                                   NA
#> 30 0.7894737                  24.12                                   NA
#> 31 0.7894737                  24.12                                   NA
#> 32 0.7894737                  24.12                                   NA
#> 33 0.8333333                  24.12                                   NA
#> 34 0.8333333                  24.12                                   NA
#> 35 0.8333333                  24.12                                   NA
#> 36 0.8333333                  24.12                                   NA
#> 37 0.8333333                  24.12                                   NA
#> 38 0.8333333                  24.12                                   NA
#> 39 0.8333333                  24.12                                   NA
#> 40 0.8333333                  24.12                                   NA
#> 41 0.8333333                  24.12                                   NA
#> 42 0.8333333                  24.12                                   NA
#> 43 0.8333333                  24.12                                   NA
#> 44 0.8333333                  24.12                                   NA
#> 45 0.8333333                  24.12                                   NA
#> 46 0.8333333                  24.12                                   NA
#> 47 0.8333333                  24.12                                   NA
#> 48 0.8333333                  24.12                                   NA
#> 49 0.8333333                  24.12                                   NA
#> 50 0.8333333                  24.12                                   NA
#>    Size.final..m Swept.area      RSA_x   RS  RSA  RSRSA  CPUEst_kg   CPUEst
#> 1             NA         NA -0.3333333 0.79 1.47 1.1613  0.1204830   4.6452
#> 2             NA         NA -0.3333333 0.79 1.47 1.1613  0.1822835   4.6452
#> 3             NA         NA -0.3333333 0.79 1.47 1.1613  0.2626401   4.6452
#> 4             NA         NA -0.3333333 0.79 1.47 1.1613  1.8467787   9.2904
#> 5             NA         NA -0.3333333 0.79 1.47 1.1613  2.0672344   9.2904
#> 6             NA         NA -0.3333333 0.79 1.47 1.1613  2.3048667   9.2904
#> 7             NA         NA -0.3333333 0.79 1.47 1.1613  4.9433314   2.3226
#> 8             NA         NA -0.3333333 0.79 1.47 1.1613  5.2124287   2.3226
#> 9             NA         NA -0.3333333 0.79 1.47 1.1613  5.4912802   2.3226
#> 10            NA         NA -0.3333333 0.79 1.47 1.1613  7.0382381   2.3226
#> 11            NA         NA -0.3333333 0.79 1.47 1.1613  1.2264845   2.3226
#> 12            NA         NA -0.3333333 0.79 1.47 1.1613  4.1978820   4.6452
#> 13            NA         NA -0.3333333 0.79 1.47 1.1613  2.7490017   2.3226
#> 14            NA         NA -0.3333333 0.79 1.47 1.1613  2.9305031   2.3226
#> 15            NA         NA -0.3333333 0.79 1.47 1.1613  6.2399106   4.6452
#> 16            NA         NA -0.3333333 0.79 1.47 1.1613  3.7378065   2.3226
#> 17            NA         NA -0.3333333 0.79 1.47 1.1613  2.5603521   9.2904
#> 18            NA         NA -0.3333333 0.79 1.47 1.1613  3.5429618  11.6130
#> 19            NA         NA -0.3333333 0.79 1.47 1.1613  3.1275998   9.2904
#> 20            NA         NA -0.3333333 0.79 1.47 1.1613  6.0212713  16.2582
#> 21            NA         NA -0.3333333 0.79 1.47 1.1613  2.8308263   6.9678
#> 22            NA         NA -0.3333333 0.79 1.47 1.1613  2.0647065   4.6452
#> 23            NA         NA -0.3333333 0.83 1.47 1.2201  1.9089940   2.4402
#> 24            NA         NA -0.3333333 0.83 1.47 1.2201  2.7056662   2.4402
#> 25            NA         NA -0.3333333 0.75 1.47 1.1025  0.0041334   2.2050
#> 26            NA         NA -0.3333333 0.75 1.47 1.1025  0.9800822   2.2050
#> 27            NA         NA -0.3333333 0.75 1.47 1.1025  1.3708485   2.2050
#> 28            NA         NA -0.3333333 0.79 1.47 1.1613  0.8601816   2.3226
#> 29            NA         NA -0.3333333 0.79 1.47 1.1613  1.0323533   2.3226
#> 30            NA         NA -0.3333333 0.79 1.47 1.1613  1.2264845   2.3226
#> 31            NA         NA -0.3333333 0.79 1.47 1.1613  2.0989410   2.3226
#> 32            NA         NA -0.3333333 0.79 1.47 1.1613  3.5234277   2.3226
#> 33            NA         NA -0.3333333 0.83 1.47 1.2201  0.6694759   8.5407
#> 34            NA         NA -0.3333333 0.83 1.47 1.2201  1.4463854  15.8613
#> 35            NA         NA -0.3333333 0.83 1.47 1.2201  2.1882930  15.8613
#> 36            NA         NA -0.3333333 0.83 1.47 1.2201  8.7880661  56.1246
#> 37            NA         NA -0.3333333 0.83 1.47 1.2201  9.9245380  56.1246
#> 38            NA         NA -0.3333333 0.83 1.47 1.2201 31.5296555 158.6130
#> 39            NA         NA -0.3333333 0.83 1.47 1.2201  6.2004240   8.5407
#> 40            NA         NA -0.3333333 0.83 1.47 1.2201 16.4522772  15.8613
#> 41            NA         NA -0.3333333 0.83 1.47 1.2201  9.4698317   8.5407
#> 42            NA         NA -0.3333333 0.83 1.47 1.2201 10.7760906   8.5407
#> 43            NA         NA -0.3333333 0.83 1.47 1.2201 25.7760468  63.4452
#> 44            NA         NA -0.3333333 0.83 1.47 1.2201 14.1001160  31.7226
#> 45            NA         NA -0.3333333 0.83 1.47 1.2201 11.8362993  24.4020
#> 46            NA         NA -0.3333333 0.83 1.47 1.2201  4.5100473   8.5407
#> 47            NA         NA -0.3333333 0.83 1.47 1.2201  9.0978649  15.8613
#> 48            NA         NA -0.3333333 0.83 1.47 1.2201  5.3097532   8.5407
#> 49            NA         NA -0.3333333 0.83 1.47 1.2201 35.2934484 158.6130
#> 50            NA         NA -0.3333333 0.83 1.47 1.2201 35.4154444 142.7517
#>       cpue_kg     cpue   cpue_kg_un cpue_un      density density_ab no_catches
#> 1   0.1204830   4.6452  0.103748400       4  0.267740037  10.322667          7
#> 2   0.1822835   4.6452  0.156965010       4  0.405074370  10.322667          7
#> 3   0.2626401   4.6452  0.226160424       4  0.583644667  10.322667          7
#> 4   1.8467787   9.2904  1.590268412       8  4.103952683  20.645333          7
#> 5   2.0672344   9.2904  1.780103692       8  4.593854261  20.645333          7
#> 6   2.3048667   9.2904  1.984729814       8  5.121926074  20.645333          7
#> 7   4.9433314   2.3226  4.256722129       2 10.985180907   5.161333          7
#> 8   5.2124287   2.3226  4.488442837       2 11.583174815   5.161333          7
#> 9   5.4912802   2.3226  4.728563006       2 12.202844931   5.161333          7
#> 10  7.0382381   2.3226  6.060654536       2 15.640529140   5.161333          7
#> 11  1.2264845   2.3226  1.056130604       2  2.725521045   5.161333          7
#> 12  4.1978820   4.6452  3.614812692       4  9.328626619  10.322667          7
#> 13  2.7490017   2.3226  2.367176188       2  6.108892682   5.161333          7
#> 14  2.9305031   2.3226  2.523467769       2  6.512229157   5.161333          7
#> 15  6.2399106   4.6452  5.373211615       4 13.866468108  10.322667          7
#> 16  3.7378065   2.3226  3.218639848       2  8.306236567   5.161333          7
#> 17  2.5603521   9.2904  2.204729295       8  5.689671400  20.645333          7
#> 18  3.5429618  11.6130  3.050858346      10  7.873248439  25.806667          7
#> 19  3.1275998   9.2904  2.693188473       8  6.950221721  20.645333          7
#> 20  6.0212713  16.2582  5.184940437      14 13.380602953  36.129333          7
#> 21  2.8308263   6.9678  2.437635642       6  6.290725048  15.484000          7
#> 22  2.0647065   4.6452  1.777926905       4  4.588236698  10.322667          7
#> 23  1.9089940   2.4402  1.564620936       2  4.242208898   5.422667          2
#> 24  2.7056662   2.4402  2.217577415       2  6.012591564   5.422667          2
#> 25  0.0041334   2.2050  0.003749116       2  0.009185334   4.900000          2
#> 26  0.9800822   2.2050  0.888963452       2  2.177960458   4.900000          2
#> 27  1.3708485   2.2050  1.243399996       2  3.046329990   4.900000          2
#> 28  0.8601816   2.3226  0.740705777       2  1.911514708   5.161333          2
#> 29  1.0323533   2.3226  0.888963452       2  2.294118349   5.161333          2
#> 30  1.2264845   2.3226  1.056130604       2  2.725521045   5.161333          2
#> 31  2.0989410   2.3226  1.807406346       2  4.664313310   5.161333          2
#> 32  3.5234277   2.3226  3.034037419       2  7.829839233   5.161333          2
#> 33  0.6694759   8.5407  0.548705733       7  1.487724145  18.979333         12
#> 34  1.4463854  15.8613  1.185464615      13  3.214189725  35.247333         12
#> 35  2.1882930  15.8613  1.793535765      13  4.862873303  35.247333         12
#> 36  8.7880661  56.1246  7.202742459      46 19.529035721 124.721333         12
#> 37  9.9245380  56.1246  8.134200440      46 22.054528794 124.721333         12
#> 38 31.5296555 158.6130 25.841861699     130 70.065901020 352.473333         12
#> 39  6.2004240   8.5407  5.081898208       7 13.778720009  18.979333         12
#> 40 16.4522772  15.8613 13.484367814      13 36.560615933  35.247333         12
#> 41  9.4698317   8.5407  7.761520951       7 21.044070472  18.979333         12
#> 42 10.7760906   8.5407  8.832137193       7 23.946867977  18.979333         12
#> 43 25.7760468  63.4452 21.126175567      52 57.280104019 140.989333         12
#> 44 14.1001160  31.7226 11.556524879      26 31.333591123  70.494667         12
#> 45 11.8362993  24.4020  9.701089523      20 26.302887394  54.226667         12
#> 46  4.5100473   8.5407  3.696457113       7 10.022327386  18.979333         12
#> 47  9.0978649  15.8613  7.456655137      13 20.217477629  35.247333         12
#> 48  5.3097532   8.5407  4.351899986       7 11.799451495  18.979333         12
#> 49 35.2934484 158.6130 28.926684992     130 78.429885241 352.473333         12
#> 50 35.4154444 142.7517 29.026673534     117 78.700987509 317.226000         12

hlcodhaul %>% 
  group_by(haul.id) %>% 
  filter(CPUEun == min(CPUEun)) %>% 
  ungroup() %>% 
  distinct(CPUEun)
#> # A tibble: 1 × 1
#>   CPUEun
#>    <dbl>
#> 1      0

# The minimum CPUE in all hauls is always zero at this stage. It doesn't really matter because I calculate haul-level CPUE by grouping by ID's and summing. 

# Rename columns and select specific columns from the cod data
# Cod
datcod <- hlcodhaul %>%
  dplyr::select(density, Year, ShootLat, ShootLong, Quarter, Country, Month, haul.id, IDx, Rect, sub_div, length_cm2) %>% 
  rename(year = Year,
         lat = ShootLat,
         lon = ShootLong,
         quarter = Quarter,
         ices_rect = Rect,
         length_cm = length_cm2) %>% 
  mutate(species = "cod")

# Flounder
datfle <- hlflehaul %>%
  dplyr::select(density, Year, ShootLat, ShootLong, Quarter, Country, Month, haul.id, IDx, Rect, sub_div, length_cm2) %>% 
  rename(year = Year,
         lat = ShootLat,
         lon = ShootLong,
         quarter = Quarter,
         ices_rect = Rect,
         length_cm = length_cm2) %>% 
  mutate(species = "flounder")

# Plaice
datpla <- hlplahaul %>%
  dplyr::select(density, Year, ShootLat, ShootLong, Quarter, Country, Month, haul.id, IDx, Rect, sub_div, length_cm2) %>% 
  rename(year = Year,
         lat = ShootLat,
         lon = ShootLong,
         quarter = Quarter,
         ices_rect = Rect,
         length_cm = length_cm2) %>% 
  mutate(species = "plaice")

# Dab
datdab <- hldabhaul %>%
  dplyr::select(density, Year, ShootLat, ShootLong, Quarter, Country, Month, haul.id, IDx, Rect, sub_div, length_cm2) %>% 
  rename(year = Year,
         lat = ShootLat,
         lon = ShootLong,
         quarter = Quarter,
         ices_rect = Rect,
         length_cm = length_cm2) %>% 
  mutate(species = "dab")
# Because it's size-based cpue, I want the data frame to be "full", so that each haul has every size, even if all are empty. That means I can define a size group, group by and sum for each haul, even if there aren't any sizes caught. Now I only have lengths with catches, and no lengths if catch is zero.
datcod %>% group_by(haul.id) %>% summarise(n_size = length(unique(length_cm))) %>% distinct(n_size, .keep_all = TRUE)
#> group_by: one grouping variable (haul.id)
#> summarise: now 9,373 rows and 2 columns, ungrouped
#> distinct: removed 9,304 rows (99%), 69 rows remaining
#> # A tibble: 69 × 2
#>    haul.id                  n_size
#>    <chr>                     <int>
#>  1 1993:1:GFR:SOL:H20:21:1      23
#>  2 1993:1:GFR:SOL:H20:22:32      3
#>  3 1993:1:GFR:SOL:H20:23:31      4
#>  4 1993:1:GFR:SOL:H20:24:30      6
#>  5 1993:1:GFR:SOL:H20:26:3      31
#>  6 1993:1:GFR:SOL:H20:27:27     10
#>  7 1993:1:GFR:SOL:H20:28:24     36
#>  8 1993:1:GFR:SOL:H20:29:29      7
#>  9 1993:1:GFR:SOL:H20:30:28      2
#> 10 1993:1:GFR:SOL:H20:31:25     29
#> # … with 59 more rows
datcod %>% filter(haul.id == "1993:1:GFR:SOL:H20:23:31") %>% as.data.frame()
#> filter: removed 219,786 rows (>99%), 4 rows remaining
#>       density year     lat   lon quarter Country Month                  haul.id
#> 1 0.009185334 1993 54.5167 14.15       1     GFR     2 1993:1:GFR:SOL:H20:23:31
#> 2 2.177960458 1993 54.5167 14.15       1     GFR     2 1993:1:GFR:SOL:H20:23:31
#> 3 3.046329990 1993 54.5167 14.15       1     GFR     2 1993:1:GFR:SOL:H20:23:31
#> 4 0.000000000 1993 54.5167 14.15       1     GFR     2 1993:1:GFR:SOL:H20:23:31
#>                         IDx ices_rect sub_div length_cm species
#> 1 1993.1.GFR.06S1.H20.23.31      38G4      24         6     cod
#> 2 1993.1.GFR.06S1.H20.23.31      38G4      24        35     cod
#> 3 1993.1.GFR.06S1.H20.23.31      38G4      24        39     cod
#> 4 1993.1.GFR.06S1.H20.23.31      38G4      24         0     cod
datcod %>% group_by(haul.id) %>% mutate(tot_dens = sum(density)) %>% ungroup() %>% distinct(haul.id, .keep_all = TRUE) %>% filter(tot_dens == 0)
#> group_by: one grouping variable (haul.id)
#> mutate (grouped): new variable 'tot_dens' (double) with 8,230 unique values and 0% NA
#> ungroup: no grouping variables
#> distinct: removed 210,417 rows (96%), 9,373 rows remaining
#> filter: removed 8,239 rows (88%), 1,134 rows remaining
#> # A tibble: 1,134 × 14
#>    density  year   lat   lon quarter Country Month haul.id IDx   ices_…¹ sub_div
#>      <dbl> <int> <dbl> <dbl>   <int> <chr>   <int> <chr>   <chr> <chr>   <chr>  
#>  1       0  1993  55    17.5       1 GFR         2 1993:1… 1993… 39G7    25     
#>  2       0  1993  57.9  19.4       1 SWE         3 1993:1… 1993… 44G9    28     
#>  3       0  1993  57.1  17.9       3 SWE         8 1993:3… 1993… 43G7    27     
#>  4       0  1993  57.1  18.9       3 SWE         8 1993:3… 1993… 43G8    28     
#>  5       0  1993  57.1  18.8       3 SWE         8 1993:3… 1993… 43G8    28     
#>  6       0  1993  57.2  18.9       3 SWE         8 1993:3… 1993… 43G8    28     
#>  7       0  1993  57.3  17.9       3 SWE         8 1993:3… 1993… 43G7    27     
#>  8       0  1993  57.4  16.9       3 SWE         8 1993:3… 1993… 43G6    27     
#>  9       0  1993  57.4  19.2       3 SWE         8 1993:3… 1993… 43G9    28     
#> 10       0  1993  57.5  17.1       3 SWE         8 1993:3… 1993… 43G7    27     
#> # … with 1,124 more rows, 3 more variables: length_cm <dbl>, species <chr>,
#> #   tot_dens <dbl>, and abbreviated variable name ¹​ices_rect

# Create a data frame with all combinations of trawl IDs and lengths
ex_df <- data.frame(expand.grid(
  length_cm = seq_range(datcod$length_cm, by = 1),
  haul.id = unique(datcod$haul.id))
  )

# Create an ID that is haul + length
ex_df$haul.id.size <- paste(ex_df$haul.id, ex_df$length_cm, sep = ".")
datcod$haul.id.size <- paste(datcod$haul.id, datcod$length_cm, sep = ".")

# Remove IDs that are already in datcod
ex_df <- ex_df %>% filter(!haul.id.size %in% unique(datcod$haul.id.size)) 
#> filter: removed 219,790 rows (18%), 979,954 rows remaining

# Add in the other columns besides density and length
dat_for_join <- datcod %>% dplyr::select(-density, -length_cm, -haul.id.size) %>% distinct(haul.id, .keep_all = TRUE)
#> distinct: removed 210,417 rows (96%), 9,373 rows remaining

ex_df <- left_join(ex_df, dat_for_join, by = "haul.id")
#> left_join: added 10 columns (year, lat, lon, quarter, Country, …)
#>            > rows only in x         0
#>            > rows only in y  (      0)
#>            > matched rows     979,954
#>            >                 =========
#>            > rows total       979,954

datcod %>% filter(haul.id.size %in% ex_df$haul.id.size)
#> filter: removed all rows (100%)
#> # A tibble: 0 × 14
#> # … with 14 variables: density <dbl>, year <int>, lat <dbl>, lon <dbl>,
#> #   quarter <int>, Country <chr>, Month <int>, haul.id <chr>, IDx <chr>,
#> #   ices_rect <chr>, sub_div <chr>, length_cm <dbl>, species <chr>,
#> #   haul.id.size <chr>

# Bind_rows these data with datcod
nrow(datcod) + nrow(ex_df)
#> [1] 1199744

unique(is.na(datcod$density))
#> [1] FALSE

datcod <- bind_rows(datcod, ex_df) %>% arrange(haul.id, length_cm)
nrow(datcod)
#> [1] 1199744
datcod
#> # A tibble: 1,199,744 × 14
#>    density  year   lat   lon quarter Country Month haul.id IDx   ices_…¹ sub_div
#>      <dbl> <int> <dbl> <dbl>   <int> <chr>   <int> <chr>   <chr> <chr>   <chr>  
#>  1       0  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  2      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  3      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  4      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  5      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  6      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  7      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  8      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#>  9      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#> 10      NA  1993  54.7    13       1 GFR         2 1993:1… 1993… 38G3    24     
#> # … with 1,199,734 more rows, 3 more variables: length_cm <dbl>, species <chr>,
#> #   haul.id.size <chr>, and abbreviated variable name ¹​ices_rect

# Replace NA density with 0 density because that's the added length-classes not previously in the catch data
datcod <- datcod %>% mutate(density = replace_na(density, 0))
#> mutate: changed 979,954 values (82%) of 'density' (979954 fewer NA)

# Check the proportion zeroes are still correct:
t <- datcod %>%
  group_by(haul.id) %>%
  summarise(haul_density = sum(density)) %>% 
  ungroup()
#> group_by: one grouping variable (haul.id)
#> summarise: now 9,373 rows and 2 columns, ungrouped
#> ungroup: no grouping variables

nrow(datcod)
#> [1] 1199744
length(unique(datcod$haul.id))
#> [1] 9373
t %>% drop_na(haul_density)
#> drop_na: no rows removed
#> # A tibble: 9,373 × 2
#>    haul.id                  haul_density
#>    <chr>                           <dbl>
#>  1 1993:1:GFR:SOL:H20:21:1        157.  
#>  2 1993:1:GFR:SOL:H20:22:32        10.3 
#>  3 1993:1:GFR:SOL:H20:23:31         5.23
#>  4 1993:1:GFR:SOL:H20:24:30        19.4 
#>  5 1993:1:GFR:SOL:H20:25:2        810.  
#>  6 1993:1:GFR:SOL:H20:26:3        363.  
#>  7 1993:1:GFR:SOL:H20:27:27        23.2 
#>  8 1993:1:GFR:SOL:H20:28:24      2302.  
#>  9 1993:1:GFR:SOL:H20:29:29        30.7 
#> 10 1993:1:GFR:SOL:H20:30:28         6.84
#> # … with 9,363 more rows
nrow(t)
#> [1] 9373
t %>% filter(!haul_density == 0)
#> filter: removed 1,134 rows (12%), 8,239 rows remaining
#> # A tibble: 8,239 × 2
#>    haul.id                  haul_density
#>    <chr>                           <dbl>
#>  1 1993:1:GFR:SOL:H20:21:1        157.  
#>  2 1993:1:GFR:SOL:H20:22:32        10.3 
#>  3 1993:1:GFR:SOL:H20:23:31         5.23
#>  4 1993:1:GFR:SOL:H20:24:30        19.4 
#>  5 1993:1:GFR:SOL:H20:25:2        810.  
#>  6 1993:1:GFR:SOL:H20:26:3        363.  
#>  7 1993:1:GFR:SOL:H20:27:27        23.2 
#>  8 1993:1:GFR:SOL:H20:28:24      2302.  
#>  9 1993:1:GFR:SOL:H20:29:29        30.7 
#> 10 1993:1:GFR:SOL:H20:30:28         6.84
#> # … with 8,229 more rows


# Flounder
# Create a data frame with all combinations of trawl IDs and lengths
ex_df <- data.frame(expand.grid(
  length_cm = seq_range(datfle$length_cm, by = 1),
  haul.id = unique(datfle$haul.id))
  )

# Create an ID that is haul + length
ex_df$haul.id.size <- paste(ex_df$haul.id, ex_df$length_cm, sep = ".")
datfle$haul.id.size <- paste(datfle$haul.id, datfle$length_cm, sep = ".")

# Remove IDs that are already in datfle
ex_df <- ex_df %>% filter(!haul.id.size %in% unique(datfle$haul.id.size)) 
#> filter: removed 111,868 rows (20%), 446,343 rows remaining

# Add in the other columns besides density and length
dat_for_join <- datfle %>% dplyr::select(-density, -length_cm, -haul.id.size) %>% distinct(haul.id, .keep_all = TRUE)
#> distinct: removed 104,371 rows (92%), 9,151 rows remaining

ex_df <- left_join(ex_df, dat_for_join, by = "haul.id")
#> left_join: added 10 columns (year, lat, lon, quarter, Country, …)
#>            > rows only in x         0
#>            > rows only in y  (      0)
#>            > matched rows     446,343
#>            >                 =========
#>            > rows total       446,343

# Bind_rows these data with datfle
datfle <- bind_rows(datfle, ex_df) %>% arrange(haul.id, length_cm)

# Replace NA density with 0 density because that's the added length-classes not previously in the catch data
datfle <- datfle %>% mutate(density = replace_na(density, 0))
#> mutate: changed 446,343 values (80%) of 'density' (446343 fewer NA)


# Plaice
# Create a data frame with all combinations of trawl IDs and lengths
ex_df <- data.frame(expand.grid(
  length_cm = seq_range(datpla$length_cm, by = 1),
  haul.id = unique(datpla$haul.id))
  )

# Create an ID that is haul + length
ex_df$haul.id.size <- paste(ex_df$haul.id, ex_df$length_cm, sep = ".")
datpla$haul.id.size <- paste(datpla$haul.id, datpla$length_cm, sep = ".")

# Remove IDs that are already in datpla
ex_df <- ex_df %>% filter(!haul.id.size %in% unique(datpla$haul.id.size)) 
#> filter: removed 49,816 rows (9%), 511,016 rows remaining

# Add in the other columns besides density and length
dat_for_join <- datpla %>% dplyr::select(-density, -length_cm, -haul.id.size) %>% distinct(haul.id, .keep_all = TRUE)
#> distinct: removed 41,213 rows (82%), 8,763 rows remaining

ex_df <- left_join(ex_df, dat_for_join, by = "haul.id")
#> left_join: added 10 columns (year, lat, lon, quarter, Country, …)
#>            > rows only in x         0
#>            > rows only in y  (      0)
#>            > matched rows     511,016
#>            >                 =========
#>            > rows total       511,016

# Bind_rows these data with datpla
datpla <- bind_rows(datpla, ex_df) %>% arrange(haul.id, length_cm)

# Replace NA density with 0 density because that's the added length-classes not previously in the catch data
datpla <- datpla %>% mutate(density = replace_na(density, 0))
#> mutate: changed 511,016 values (91%) of 'density' (511016 fewer NA)


# Dab
# Create a data frame with all combinations of trawl IDs and lengths
ex_df <- data.frame(expand.grid(
  length_cm = seq_range(datdab$length_cm, by = 1),
  haul.id = unique(datdab$haul.id))
  )

# Create an ID that is haul + length
ex_df$haul.id.size <- paste(ex_df$haul.id, ex_df$length_cm, sep = ".")
datdab$haul.id.size <- paste(datdab$haul.id, datdab$length_cm, sep = ".")

# Remove IDs that are already in datdab
ex_df <- ex_df %>% filter(!haul.id.size %in% unique(datdab$haul.id.size)) 
#> filter: removed 19,233 rows (6%), 322,251 rows remaining

# Add in the other columns besides density and length
dat_for_join <- datdab %>% dplyr::select(-density, -length_cm, -haul.id.size) %>% distinct(haul.id, .keep_all = TRUE)
#> distinct: removed 10,477 rows (54%), 8,756 rows remaining

ex_df <- left_join(ex_df, dat_for_join, by = "haul.id")
#> left_join: added 10 columns (year, lat, lon, quarter, Country, …)
#>            > rows only in x         0
#>            > rows only in y  (      0)
#>            > matched rows     322,251
#>            >                 =========
#>            > rows total       322,251

# Bind_rows these data with datdab
datdab <- bind_rows(datdab, ex_df) %>% arrange(haul.id, length_cm)

# Replace NA density with 0 density because that's the added length-classes not previously in the catch data
datdab <- datdab %>% mutate(density = replace_na(density, 0))
#> mutate: changed 322,251 values (94%) of 'density' (322251 fewer NA)

# Merge all species data
dat <- bind_rows(datcod, datfle, datpla, datdab)

glimpse(dat)
#> Rows: 2,662,085
#> Columns: 14
#> $ density      <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.…
#> $ year         <int> 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 199…
#> $ lat          <dbl> 54.6833, 54.6833, 54.6833, 54.6833, 54.6833, 54.6833, 54.…
#> $ lon          <dbl> 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 1…
#> $ quarter      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
#> $ Country      <chr> "GFR", "GFR", "GFR", "GFR", "GFR", "GFR", "GFR", "GFR", "…
#> $ Month        <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
#> $ haul.id      <chr> "1993:1:GFR:SOL:H20:21:1", "1993:1:GFR:SOL:H20:21:1", "19…
#> $ IDx          <chr> "1993.1.GFR.06S1.H20.21.1", "1993.1.GFR.06S1.H20.21.1", "…
#> $ ices_rect    <chr> "38G3", "38G3", "38G3", "38G3", "38G3", "38G3", "38G3", "…
#> $ sub_div      <chr> "24", "24", "24", "24", "24", "24", "24", "24", "24", "24…
#> $ length_cm    <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
#> $ species      <chr> "cod", "cod", "cod", "cod", "cod", "cod", "cod", "cod", "…
#> $ haul.id.size <chr> "1993:1:GFR:SOL:H20:21:1.0", "1993:1:GFR:SOL:H20:21:1.1",…

# Check proportion zeroes
q1 <- dat %>% 
  filter(quarter == 1) %>% 
  group_by(species, haul.id) %>% 
  mutate(zero_catch = ifelse(sum(density) == 0, "Y", "N")) %>% 
  ungroup() %>% 
  group_by(year, species, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_z = Y / (N+Y), 
         q = 1)  
#> filter: removed 1,091,659 rows (41%), 1,570,426 rows remaining
#> group_by: 2 grouping variables (species, haul.id)
#> mutate (grouped): new variable 'zero_catch' (character) with 2 unique values and 0% NA
#> ungroup: no grouping variables
#> group_by: 3 grouping variables (year, species, zero_catch)
#> summarise: now 224 rows and 4 columns, 2 group variables remaining (year, species)
#> ungroup: no grouping variables
#> pivot_wider: reorganized (zero_catch, n) into (N, Y) [was 224x4, now 112x4]
#> mutate: new variable 'prop_z' (double) with 112 unique values and 0% NA
#>         new variable 'q' (double) with one unique value and 0% NA

q4 <- dat %>% 
  filter(quarter == 4) %>% 
  group_by(species, haul.id) %>% 
  mutate(zero_catch = ifelse(sum(density) == 0, "Y", "N")) %>% 
  ungroup() %>% 
  group_by(year, species, zero_catch) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  pivot_wider(names_from = zero_catch, values_from = n) %>% 
  mutate(prop_z = Y / (N+Y), 
         q = 4)  
#> filter: removed 1,603,130 rows (60%), 1,058,955 rows remaining
#> group_by: 2 grouping variables (species, haul.id)
#> mutate (grouped): new variable 'zero_catch' (character) with 2 unique values and 0% NA
#> ungroup: no grouping variables
#> group_by: 3 grouping variables (year, species, zero_catch)
#> summarise: now 223 rows and 4 columns, 2 group variables remaining (year, species)
#> ungroup: no grouping variables
#> pivot_wider: reorganized (zero_catch, n) into (N, Y) [was 223x4, now 112x4]
#> mutate: new variable 'prop_z' (double) with 110 unique values and 1% NA
#>         new variable 'q' (double) with one unique value and 0% NA

ggplot(bind_rows(q1, q4), aes(year, prop_z*100, color = factor(q))) +
  geom_line() +
  facet_wrap(~ species, ncol = 1)


# Save the full catch data as dat_full. For adding the unique covariates, I only need the distinct trawl IDs, not the 1.7 million rows...
# dat_full <- dat
# dat <- dat %>% dplyr::select(-density, length_cm, species, haul.id.size) %>% distinct(haul.id, .keep_all = TRUE)
# 
# # Check 0 catches
# dat_full %>%
#   filter(species == "cod") %>%
#   group_by(haul.id) %>%
#   summarise(haul_dens = sum(density)) %>%
#   ungroup() %>%
#   filter(!haul_dens == 0)

Add in the environmental variables

Substrate

# substrate <- raster("data/substrate_tif/BALANCE_SEABED_SEDIMENT.tif")
# substrate_longlat = projectRaster(substrate, crs = ('+proj=longlat'))
# 
# # Now extract the values from the saduria raster to dat
# dat$substrate <- extract(substrate_longlat, dat %>% dplyr::select(lon, lat))
# 
# unique(dat$substrate)
# 
# # Plot
# ggplot(dat, aes(lon, lat, color = substrate)) + 
#   geom_point()
# 
# factor(sort(unique(round(dat$substrate))))
# 
# dat$substrate <- round(dat$substrate)
# 
# dat <- dat %>% mutate(substrate = ifelse(substrate == 1, "bedrock", substrate),
#                       substrate = ifelse(substrate == 2, "hard-bottom complex", substrate),
#                       substrate = ifelse(substrate == 3, "sand", substrate),
#                       substrate = ifelse(substrate == 4, "hard clay", substrate),
#                       substrate = ifelse(substrate == 5, "mud", substrate))
# # I. Bedrock.
# # II. Hard bottom complex, includes patchy hard surfaces and coarse sand (sometimes also clay) to boulders.
# # III. Sand including fine to coarse sand (with gravel exposures).
# # IV. Hard clay sometimes/often/possibly exposed or covered with a thin layer of
# # sand/gravel.
# # V. Mud including gyttja-clay to gyttja-silt.
# 
# # Plot
# ggplot(dat, aes(lon, lat, color = substrate)) + 
#   geom_point()

Depth

# Only use unique locations and then left_join else it will take forever
dat_haul <- dat %>% distinct(haul.id, .keep_all = TRUE) %>% dplyr::select(lat, lon, year, quarter)
#> distinct: removed 2,652,709 rows (>99%), 9,376 rows remaining

# https://gis.stackexchange.com/questions/411261/read-multiple-layers-raster-from-ncdf-file-using-terra-package
# https://emodnet.ec.europa.eu/geoviewer/
dep_raster <- terra::rast("data/Mean depth natural colour (with land).nc")
class(dep_raster)
#> [1] "SpatRaster"
#> attr(,"package")
#> [1] "terra"
plot(dep_raster)


dat_haul$depth <- terra::extract(dep_raster, dat_haul %>% dplyr::select(lon, lat))$elevation

ggplot(dat_haul, aes(lon, lat, color = depth*-1)) + 
  geom_point() + 
  scale_color_viridis(direction = -1)


dat_haul$depth <- dat_haul$depth*-1

Oxygen

# Downloaded from here: https://resources.marine.copernicus.eu/?option=com_csw&view=details&product_id=BALTICSEA_REANALYSIS_BIO_003_012
# Extract raster points: https://gisday.wordpress.com/2014/03/24/extract-raster-values-from-points-using-r/comment-page-1/
# https://rpubs.com/boyerag/297592
# https://pjbartlein.github.io/REarthSysSci/netCDF.html#get-a-variable
# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1664182224542.nc")

print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-scobi-monthlymeans_1664182224542.nc (NC_FORMAT_CLASSIC):
#> 
#>      1 variables (excluding dimension variables):
#>         float o2b[longitude,latitude,time]   
#>             long_name: Sea_floor_Dissolved_Oxygen_Concentration
#>             missing_value: NaN
#>             standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
#>             units: mmol m-3
#>             _FillValue: NaN
#>             _ChunkSizes: 1
#>              _ChunkSizes: 523
#>              _ChunkSizes: 383
#> 
#>      3 dimensions:
#>         time  Size:336
#>             axis: T
#>             long_name: Validity time
#>             standard_name: time
#>             units: days since 1950-01-01 00:00:00
#>             calendar: gregorian
#>             _ChunkSizes: 512
#>             _CoordinateAxisType: Time
#>             valid_min: 15721.5
#>             valid_max: 25917.5
#>         latitude  Size:523
#>             axis: Y
#>             standard_name: latitude
#>             long_name: latitude
#>             units: degrees_north
#>             _CoordinateAxisType: Lat
#>             valid_min: 48.49169921875
#>             valid_max: 65.8914184570312
#>         longitude  Size:383
#>             standard_name: longitude
#>             long_name: longitude
#>             units: degrees_east
#>             axis: X
#>             _CoordinateAxisType: Lon
#>             valid_min: 9.01375484466553
#>             valid_max: 30.2357654571533
#> 
#>     24 global attributes:
#>         references: http://www.smhi.se
#>         institution: Swedish Meterological and Hydrological Institute
#>         history: See source and creation_date attributees
#>         Conventions: CF-1.5
#>         contact: servicedesk_cmems@mercator-ocean.eu
#>         comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#>         bullentin_type: reanalysis
#>         cmems_product_id: BALTICSEA_REANALYSIS_BIO_003_012
#>         title: CMEMS V4 Reanalysis: SCOBI model 3D fields (monthly means)
#>         FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#>         FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#>         FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#>         FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#>         shallowest_depth: 1.50136542320251
#>         deepest_depth: 711.059204101562
#>         source: SMHI reanalysis run NORDIC-NS2_1d_20201201_20201201
#>         file_quality_index: 1
#>         creation_date: 2021-11-09 UTC
#>         bullentin_date: 20201201
#>         start_date: 2020-12-01 UTC
#>         stop_date: 2020-12-01 UTC
#>         start_time: 00:00 UTC
#>         stop_time: 00:00 UTC
#>         _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention

# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530

lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 48.49170 48.52503 48.55836 48.59170 48.62503 48.65836

# Get time
time <- ncvar_get(ncin,"time")
time
#>   [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#>  [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#>  [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#>  [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#>  [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#>  [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#>  [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#>  [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#>  [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#>  [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#>  [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5
#> [325] 25582.5 25612.5 25642.5 25673.0 25703.5 25734.0 25764.5 25795.5 25826.0
#> [334] 25856.5 25887.0 25917.5

tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 336
tunits
#> $hasatt
#> [1] TRUE
#> 
#> $value
#> [1] "days since 1950-01-01 00:00:00"

# Get oxygen
dname <- "o2b"

oxy_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(oxy_array)
#> [1] 383 523 336

# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")

# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])

# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))

# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)

# Replace netCDF fill values with NA's
oxy_array[oxy_array == fillvalue$value] <- NA

# Next, we need to work with the months that correspond to the quarters that we use.
# loop through each time step, and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#>   [1]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#>  [26]  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2
#>  [51]  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3
#>  [76]  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4
#> [101]  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5
#> [126]  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6
#> [151]  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7
#> [176]  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8
#> [201]  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9
#> [226] 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10
#> [251] 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11
#> [276] 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12
#> [301]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#> [326]  2  3  4  5  6  7  8  9 10 11 12

index_keep_q1 <- which(months < 4)
index_keep_q4 <- which(months > 9)

oxy_q1 <- oxy_array[, , index_keep_q1]
oxy_q4 <- oxy_array[, , index_keep_q4]

months_keep_q1 <- months[index_keep_q1]
months_keep_q4 <- months[index_keep_q4]

years_keep_q1 <- years[index_keep_q1]
years_keep_q4 <- years[index_keep_q4]

# Now we have an array with data for that quarter
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq_q1 <- seq(1, dim(oxy_q1)[3], by = 3)
loop_seq_q4 <- seq(1, dim(oxy_q4)[3], by = 3)

# Create objects that will hold data
dlist_q1 <- list()
dlist_q4 <- list()

oxy_1 <- c()
oxy_2 <- c()
oxy_3 <- c()
oxy_ave_q1 <- c()

oxy_10 <- c()
oxy_11 <- c()
oxy_12 <- c()
oxy_ave_q4 <- c()

# Now average by quarter. The vector loop_seq_q1 is 1, 4, 7 etc. So first i is 1, 2, 3,
# which is the index we want. 

for(i in loop_seq_q1) { # We can use q1 as looping index, doesn't matter!
  
  oxy_1 <- oxy_q1[, , (i)]
  oxy_2 <- oxy_q1[, , (i + 1)]
  oxy_3 <- oxy_q1[, , (i + 2)]
  
  oxy_10 <- oxy_q4[, , (i)]
  oxy_11 <- oxy_q4[, , (i + 1)]
  oxy_12 <- oxy_q4[, , (i + 2)]
  
  oxy_ave_q1 <- (oxy_1 + oxy_2 + oxy_3) / 3
  oxy_ave_q4 <- (oxy_10 + oxy_11 + oxy_12) / 3
    
  list_pos_q1 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  list_pos_q4 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  
  dlist_q1[[list_pos_q1]] <- oxy_ave_q1
  dlist_q4[[list_pos_q4]] <- oxy_ave_q4

}

# Now name the lists with the year:
names(dlist_q1) <- unique(years_keep_q1)
names(dlist_q4) <- unique(years_keep_q4)

# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat_haul so far in this script

# Filter years in the cpue data frame to only have the years I have oxygen for
d_sub_oxy_q1 <- dat_haul %>% filter(quarter == 1) %>% filter(year %in% names(dlist_q1)) %>% droplevels()
#> filter: removed 3,779 rows (40%), 5,597 rows remaining
#> filter: no rows removed
d_sub_oxy_q4 <- dat_haul %>% filter(quarter == 4) %>% filter(year %in% names(dlist_q4)) %>% droplevels()
#> filter: removed 5,709 rows (61%), 3,667 rows remaining
#> filter: no rows removed

# Create data holding object
oxy_data_list_q1 <- list()
oxy_data_list_q4 <- list()

# ... And for the oxygen raster
raster_list_q1 <- list()
raster_list_q4 <- list()

# Create factor year for indexing the list in the loop
d_sub_oxy_q1$year_f <- as.factor(d_sub_oxy_q1$year)
d_sub_oxy_q4$year_f <- as.factor(d_sub_oxy_q4$year)

# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_oxy_q1$year_f)) { # We can use q1 as looping index, doesn't matter!
  
  # Set plot limits
  ymin = 54; ymax = 58; xmin = 12; xmax = 22

  # Subset a year
  oxy_slice_q1 <- dlist_q1[[i]]
  oxy_slice_q4 <- dlist_q4[[i]]
  
  # Create raster for that year (i)
  r_q1 <- raster(t(oxy_slice_q1), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
                 crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  r_q4 <- raster(t(oxy_slice_q4), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
                 crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  
  # Flip...
  r_q1 <- flip(r_q1, direction = 'y')
  r_q4 <- flip(r_q4, direction = 'y')
  
  plot(r_q1, main = paste(i, "Q1"))
  plot(r_q4, main = paste(i, "Q4"))
  
  # Filter the same year (i) in the cpue data and select only coordinates
  d_slice_q1 <- d_sub_oxy_q1 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  d_slice_q4 <- d_sub_oxy_q4 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  
  # Make into a SpatialPoints object
  data_sp_q1 <- SpatialPoints(d_slice_q1)
  data_sp_q4 <- SpatialPoints(d_slice_q4)
  
  # Extract raster value (oxygen)
  rasValue_q1 <- raster::extract(r_q1, data_sp_q1)
  rasValue_q4 <- raster::extract(r_q4, data_sp_q4)
  
  # Now we want to plot the results of the raster extractions by plotting the cpue
  # data points over a raster and saving it for each year.
  # Make the SpatialPoints object into a raster again (for plot)
  df_q1 <- as.data.frame(data_sp_q1)
  df_q4 <- as.data.frame(data_sp_q4)
  
  # Add in the raster value in the df holding the coordinates for the cpue data
  d_slice_q1$oxy <- rasValue_q1
  d_slice_q4$oxy <- rasValue_q4
  
  # Add in which year
  d_slice_q1$year <- i
  d_slice_q4$year <- i

  # Now the unit of oxygen is mmol/m3. I want it to be ml/L. The original model is in unit ml/L
  # and it's been converted by the data host. Since it was converted without accounting for
  # pressure or temperature, I can simply use the following conversion factor:
  # 1 ml/l = 103/22.391 = 44.661 μmol/l -> 1 ml/l = 0.044661 mmol/l = 44.661 mmol/m^3 -> 0.0223909 ml/l = 1mmol/m^3
  # https://ocean.ices.dk/tools/unitconversion.aspx

  d_slice_q1$oxy <- d_slice_q1$oxy * 0.0223909
  d_slice_q4$oxy <- d_slice_q4$oxy * 0.0223909
    
  # Create a index for the data last where we store all years (because our loop index
  # i is not continuous, we can't use it directly)
  index_q1 <- as.numeric(as.character(d_slice_q1$year))[1] - 1992
  index_q4 <- as.numeric(as.character(d_slice_q4$year))[1] - 1992
  
  # Add each years' data in the list
  oxy_data_list_q1[[index_q1]] <- d_slice_q1
  oxy_data_list_q4[[index_q4]] <- d_slice_q4

}

#> filter: removed 5,496 rows (98%), 101 rows remaining
#> filter: removed 3,606 rows (98%), 61 rows remaining

#> filter: removed 5,422 rows (97%), 175 rows remaining
#> filter: removed 3,605 rows (98%), 62 rows remaining

#> filter: removed 5,443 rows (97%), 154 rows remaining
#> filter: removed 3,614 rows (99%), 53 rows remaining

#> filter: removed 5,414 rows (97%), 183 rows remaining
#> filter: removed 3,606 rows (98%), 61 rows remaining

#> filter: removed 5,383 rows (96%), 214 rows remaining
#> filter: removed 3,592 rows (98%), 75 rows remaining

#> filter: removed 5,369 rows (96%), 228 rows remaining
#> filter: removed 3,599 rows (98%), 68 rows remaining

#> filter: removed 5,407 rows (97%), 190 rows remaining
#> filter: removed 3,573 rows (97%), 94 rows remaining

#> filter: removed 5,487 rows (98%), 110 rows remaining
#> filter: removed 3,578 rows (98%), 89 rows remaining

#> filter: removed 5,389 rows (96%), 208 rows remaining
#> filter: removed 3,551 rows (97%), 116 rows remaining

#> filter: removed 5,452 rows (97%), 145 rows remaining
#> filter: removed 3,550 rows (97%), 117 rows remaining

#> filter: removed 5,436 rows (97%), 161 rows remaining
#> filter: removed 3,539 rows (97%), 128 rows remaining

#> filter: removed 5,390 rows (96%), 207 rows remaining
#> filter: removed 3,558 rows (97%), 109 rows remaining

#> filter: removed 5,379 rows (96%), 218 rows remaining
#> filter: removed 3,516 rows (96%), 151 rows remaining

#> filter: removed 5,409 rows (97%), 188 rows remaining
#> filter: removed 3,517 rows (96%), 150 rows remaining

#> filter: removed 5,362 rows (96%), 235 rows remaining
#> filter: removed 3,498 rows (95%), 169 rows remaining

#> filter: removed 5,375 rows (96%), 222 rows remaining
#> filter: removed 3,491 rows (95%), 176 rows remaining

#> filter: removed 5,338 rows (95%), 259 rows remaining
#> filter: removed 3,487 rows (95%), 180 rows remaining

#> filter: removed 5,356 rows (96%), 241 rows remaining
#> filter: removed 3,505 rows (96%), 162 rows remaining

#> filter: removed 5,354 rows (96%), 243 rows remaining
#> filter: removed 3,487 rows (95%), 180 rows remaining

#> filter: removed 5,373 rows (96%), 224 rows remaining
#> filter: removed 3,534 rows (96%), 133 rows remaining

#> filter: removed 5,329 rows (95%), 268 rows remaining
#> filter: removed 3,519 rows (96%), 148 rows remaining

#> filter: removed 5,389 rows (96%), 208 rows remaining
#> filter: removed 3,491 rows (95%), 176 rows remaining

#> filter: removed 5,360 rows (96%), 237 rows remaining
#> filter: removed 3,501 rows (95%), 166 rows remaining

#> filter: removed 5,365 rows (96%), 232 rows remaining
#> filter: removed 3,454 rows (94%), 213 rows remaining

#> filter: removed 5,351 rows (96%), 246 rows remaining
#> filter: removed 3,442 rows (94%), 225 rows remaining

#> filter: removed 5,341 rows (95%), 256 rows remaining
#> filter: removed 3,456 rows (94%), 211 rows remaining

#> filter: removed 5,479 rows (98%), 118 rows remaining
#> filter: removed 3,575 rows (97%), 92 rows remaining

#> filter: removed 5,471 rows (98%), 126 rows remaining
#> filter: removed 3,565 rows (97%), 102 rows remaining


# # Now create a data frame from the list of all annual values
big_dat_oxy_q1 <- dplyr::bind_rows(oxy_data_list_q1)
big_dat_oxy_q4 <- dplyr::bind_rows(oxy_data_list_q4)
big_dat_oxy <- bind_rows(mutate(big_dat_oxy_q1, quarter = 1),
                         mutate(big_dat_oxy_q4, quarter = 4))
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA

Temperature

# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1664183191233.nc")
                                        
print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1664183191233.nc (NC_FORMAT_CLASSIC):
#> 
#>      1 variables (excluding dimension variables):
#>         float bottomT[longitude,latitude,time]   
#>             standard_name: sea_water_potential_temperature_at_sea_floor
#>             units: degrees_C
#>             long_name: Sea floor potential temperature
#>             missing_value: NaN
#>             _FillValue: NaN
#>             _ChunkSizes: 1
#>              _ChunkSizes: 523
#>              _ChunkSizes: 383
#> 
#>      3 dimensions:
#>         time  Size:336
#>             axis: T
#>             long_name: Validity time
#>             standard_name: time
#>             units: days since 1950-01-01 00:00:00
#>             calendar: gregorian
#>             _ChunkSizes: 512
#>             _CoordinateAxisType: Time
#>             valid_min: 15721.5
#>             valid_max: 25917.5
#>         latitude  Size:523
#>             axis: Y
#>             standard_name: latitude
#>             long_name: latitude
#>             units: degrees_north
#>             _CoordinateAxisType: Lat
#>             valid_min: 48.49169921875
#>             valid_max: 65.8914184570312
#>         longitude  Size:383
#>             standard_name: longitude
#>             long_name: longitude
#>             units: degrees_east
#>             axis: X
#>             _CoordinateAxisType: Lon
#>             valid_min: 9.01375484466553
#>             valid_max: 30.2357654571533
#> 
#>     24 global attributes:
#>         references: http://www.smhi.se
#>         institution: Swedish Meterological and Hydrological Institute
#>         history: See source and creation_date attributees
#>         Conventions: CF-1.5
#>         contact: servicedesk_cmems@mercator-ocean.eu
#>         comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#>         bullentin_type: reanalysis
#>         cmems_product_id: BALTICSEA_REANALYSIS_PHY_003_011
#>         title: CMEMS V4 Reanalysis: NEMO model 3D fields (monthly means)
#>         FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#>         FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#>         FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#>         FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#>         shallowest_depth: 1.50136542320251
#>         deepest_depth: 711.059204101562
#>         source: SMHI reanalysis run NORDIC-NS2_1d_20201201_20201201
#>         file_quality_index: 1
#>         creation_date: 2021-11-09 UTC
#>         bullentin_date: 20201201
#>         start_date: 2020-12-01 UTC
#>         stop_date: 2020-12-01 UTC
#>         start_time: 00:00 UTC
#>         stop_time: 00:00 UTC
#>         _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention

# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 9.013755 9.069310 9.124865 9.180420 9.235975 9.291530

lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 48.49170 48.52503 48.55836 48.59170 48.62503 48.65836

# Get time
time <- ncvar_get(ncin,"time")
time
#>   [1] 15721.5 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0
#>  [10] 15994.5 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0
#>  [19] 16267.5 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5
#>  [28] 16541.0 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5
#>  [37] 16816.5 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0
#>  [46] 17090.5 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0
#>  [55] 17363.5 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5
#>  [64] 17637.0 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5
#>  [73] 17912.5 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0
#>  [82] 18185.5 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0
#>  [91] 18459.5 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5
#> [100] 18733.0 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5
#> [109] 19008.5 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0
#> [118] 19281.5 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0
#> [127] 19554.5 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5
#> [136] 19829.0 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5
#> [145] 20104.5 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0
#> [154] 20377.5 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0
#> [163] 20650.5 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5
#> [172] 20924.0 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5
#> [181] 21199.5 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0
#> [190] 21473.5 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0
#> [199] 21746.5 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5
#> [208] 22020.0 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5
#> [217] 22295.5 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0
#> [226] 22568.5 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0
#> [235] 22842.5 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5
#> [244] 23116.0 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5
#> [253] 23391.5 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0
#> [262] 23664.5 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0
#> [271] 23937.5 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5
#> [280] 24212.0 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5
#> [289] 24487.5 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0
#> [298] 24760.5 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0
#> [307] 25033.5 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5
#> [316] 25307.0 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5
#> [325] 25582.5 25612.5 25642.5 25673.0 25703.5 25734.0 25764.5 25795.5 25826.0
#> [334] 25856.5 25887.0 25917.5

tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 336
tunits
#> $hasatt
#> [1] TRUE
#> 
#> $value
#> [1] "days since 1950-01-01 00:00:00"

# Get temperature
dname <- "bottomT"

temp_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(temp_array)
#> [1] 383 523 336

# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")

# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])

# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))

# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)

# Replace netCDF fill values with NA's
temp_array[temp_array == fillvalue$value] <- NA

# Next, we need to work with the months that correspond to the quarters that we use.
# loop through each time step, and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#>   [1]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#>  [26]  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2
#>  [51]  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3
#>  [76]  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4
#> [101]  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5
#> [126]  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6
#> [151]  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7
#> [176]  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8
#> [201]  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9
#> [226] 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10
#> [251] 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11
#> [276] 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12
#> [301]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#> [326]  2  3  4  5  6  7  8  9 10 11 12

index_keep_q1 <- which(months < 4)
index_keep_q4 <- which(months > 9)

temp_q1 <- temp_array[, , index_keep_q1]
temp_q4 <- temp_array[, , index_keep_q4]

months_keep_q1 <- months[index_keep_q1]
months_keep_q4 <- months[index_keep_q4]

years_keep_q1 <- years[index_keep_q1]
years_keep_q4 <- years[index_keep_q4]

# Now we have an array with data for that quarter
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq_q1 <- seq(1, dim(temp_q1)[3], by = 3)
loop_seq_q4 <- seq(1, dim(temp_q4)[3], by = 3)

# Create objects that will hold data
dlist_q1 <- list()
dlist_q4 <- list()

temp_1 <- c()
temp_2 <- c()
temp_3 <- c()
temp_ave_q1 <- c()

temp_10 <- c()
temp_11 <- c()
temp_12 <- c()
temp_ave_q4 <- c()

# Now average by quarter. The vector loop_seq_q1 is 1, 4, 7 etc. So first i is 1, 2, 3,
# which is the index we want. 

for(i in loop_seq_q1) {
  
  temp_1 <- temp_q1[, , (i)]
  temp_2 <- temp_q1[, , (i + 1)]
  temp_3 <- temp_q1[, , (i + 2)]
  
  temp_10 <- temp_q4[, , (i)]
  temp_11 <- temp_q4[, , (i + 1)]
  temp_12 <- temp_q4[, , (i + 2)]
  
  temp_ave_q1 <- (temp_1 + temp_2 + temp_3) / 3
  temp_ave_q4 <- (temp_10 + temp_11 + temp_12) / 3
  
  list_pos_q1 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  list_pos_q4 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  
  dlist_q1[[list_pos_q1]] <- temp_ave_q1
  dlist_q4[[list_pos_q4]] <- temp_ave_q4
  
}

# Now name the lists with the year:
names(dlist_q1) <- unique(years_keep_q1)
names(dlist_q4) <- unique(years_keep_q4)

# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script

# Filter years in the cpue data frame to only have the years I have temperature for
d_sub_temp_q1 <- dat_haul %>% filter(quarter == 1) %>% filter(year %in% names(dlist_q1)) %>% droplevels()
#> filter: removed 3,779 rows (40%), 5,597 rows remaining
#> filter: no rows removed
d_sub_temp_q4 <- dat_haul %>% filter(quarter == 4) %>% filter(year %in% names(dlist_q4)) %>% droplevels()
#> filter: removed 5,709 rows (61%), 3,667 rows remaining
#> filter: no rows removed

# Create data holding object
temp_data_list_q1 <- list()
temp_data_list_q4 <- list()

# ... And for the temperature raster
raster_list_q1 <- list()
raster_list_q4 <- list()

# Create factor year for indexing the list in the loop
d_sub_temp_q1$year_f <- as.factor(d_sub_temp_q1$year)
d_sub_temp_q4$year_f <- as.factor(d_sub_temp_q4$year)

# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_temp_q1$year_f)) {
  
  # Set plot limits
  ymin = 54; ymax = 58; xmin = 12; xmax = 22
  
  # Subset a year
  temp_slice_q1 <- dlist_q1[[i]]
  temp_slice_q4 <- dlist_q4[[i]]
  
  # Create raster for that year (i)
  r_q1 <- raster(t(temp_slice_q1), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
                 crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  r_q4 <- raster(t(temp_slice_q4), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
                 crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  
  # Flip...
  r_q1 <- flip(r_q1, direction = 'y')
  r_q4 <- flip(r_q4, direction = 'y')
  
  plot(r_q1, main = paste(i, "Q1"))
  plot(r_q4, main = paste(i, "Q4"))
  
  # Filter the same year (i) in the cpue data and select only coordinates
  d_slice_q1 <- d_sub_temp_q1 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  d_slice_q4 <- d_sub_temp_q4 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  
  # Make into a SpatialPoints object
  data_sp_q1 <- SpatialPoints(d_slice_q1)
  data_sp_q4 <- SpatialPoints(d_slice_q4)
  
  # Extract raster value (temperature)
  rasValue_q1 <- raster::extract(r_q1, data_sp_q1)
  rasValue_q4 <- raster::extract(r_q4, data_sp_q4)
  
  # Now we want to plot the results of the raster extractions by plotting the cpue
  # data points over a raster and saving it for each year.
  # Make the SpatialPoints object into a raster again (for pl)
  df_q1 <- as.data.frame(data_sp_q1)
  df_q4 <- as.data.frame(data_sp_q4)
  
  # Add in the raster value in the df holding the coordinates for the cpue data
  d_slice_q1$temp <- rasValue_q1
  d_slice_q4$temp <- rasValue_q4
  
  # Add in which year
  d_slice_q1$year <- i
  d_slice_q4$year <- i
  
  # Create a index for the data last where we store all years (because our loop index
  # i is not continuous, we can't use it directly)
  index_q1 <- as.numeric(d_slice_q1$year)[1] - 1992
  index_q4 <- as.numeric(d_slice_q4$year)[1] - 1992
  
  # Add each years' data in the list
  temp_data_list_q1[[index_q1]] <- d_slice_q1
  temp_data_list_q4[[index_q4]] <- d_slice_q4
  
}

#> filter: removed 5,496 rows (98%), 101 rows remaining
#> filter: removed 3,606 rows (98%), 61 rows remaining

#> filter: removed 5,422 rows (97%), 175 rows remaining
#> filter: removed 3,605 rows (98%), 62 rows remaining

#> filter: removed 5,443 rows (97%), 154 rows remaining
#> filter: removed 3,614 rows (99%), 53 rows remaining

#> filter: removed 5,414 rows (97%), 183 rows remaining
#> filter: removed 3,606 rows (98%), 61 rows remaining

#> filter: removed 5,383 rows (96%), 214 rows remaining
#> filter: removed 3,592 rows (98%), 75 rows remaining

#> filter: removed 5,369 rows (96%), 228 rows remaining
#> filter: removed 3,599 rows (98%), 68 rows remaining

#> filter: removed 5,407 rows (97%), 190 rows remaining
#> filter: removed 3,573 rows (97%), 94 rows remaining

#> filter: removed 5,487 rows (98%), 110 rows remaining
#> filter: removed 3,578 rows (98%), 89 rows remaining

#> filter: removed 5,389 rows (96%), 208 rows remaining
#> filter: removed 3,551 rows (97%), 116 rows remaining

#> filter: removed 5,452 rows (97%), 145 rows remaining
#> filter: removed 3,550 rows (97%), 117 rows remaining

#> filter: removed 5,436 rows (97%), 161 rows remaining
#> filter: removed 3,539 rows (97%), 128 rows remaining

#> filter: removed 5,390 rows (96%), 207 rows remaining
#> filter: removed 3,558 rows (97%), 109 rows remaining

#> filter: removed 5,379 rows (96%), 218 rows remaining
#> filter: removed 3,516 rows (96%), 151 rows remaining

#> filter: removed 5,409 rows (97%), 188 rows remaining
#> filter: removed 3,517 rows (96%), 150 rows remaining

#> filter: removed 5,362 rows (96%), 235 rows remaining
#> filter: removed 3,498 rows (95%), 169 rows remaining

#> filter: removed 5,375 rows (96%), 222 rows remaining
#> filter: removed 3,491 rows (95%), 176 rows remaining

#> filter: removed 5,338 rows (95%), 259 rows remaining
#> filter: removed 3,487 rows (95%), 180 rows remaining

#> filter: removed 5,356 rows (96%), 241 rows remaining
#> filter: removed 3,505 rows (96%), 162 rows remaining

#> filter: removed 5,354 rows (96%), 243 rows remaining
#> filter: removed 3,487 rows (95%), 180 rows remaining

#> filter: removed 5,373 rows (96%), 224 rows remaining
#> filter: removed 3,534 rows (96%), 133 rows remaining

#> filter: removed 5,329 rows (95%), 268 rows remaining
#> filter: removed 3,519 rows (96%), 148 rows remaining

#> filter: removed 5,389 rows (96%), 208 rows remaining
#> filter: removed 3,491 rows (95%), 176 rows remaining

#> filter: removed 5,360 rows (96%), 237 rows remaining
#> filter: removed 3,501 rows (95%), 166 rows remaining

#> filter: removed 5,365 rows (96%), 232 rows remaining
#> filter: removed 3,454 rows (94%), 213 rows remaining

#> filter: removed 5,351 rows (96%), 246 rows remaining
#> filter: removed 3,442 rows (94%), 225 rows remaining

#> filter: removed 5,341 rows (95%), 256 rows remaining
#> filter: removed 3,456 rows (94%), 211 rows remaining

#> filter: removed 5,479 rows (98%), 118 rows remaining
#> filter: removed 3,575 rows (97%), 92 rows remaining

#> filter: removed 5,471 rows (98%), 126 rows remaining
#> filter: removed 3,565 rows (97%), 102 rows remaining


# Now create a data frame from the list of all annual values
big_dat_temp_q1 <- dplyr::bind_rows(temp_data_list_q1)
big_dat_temp_q4 <- dplyr::bind_rows(temp_data_list_q4)
big_dat_temp <- bind_rows(mutate(big_dat_temp_q1, quarter = 1),
                          mutate(big_dat_temp_q4, quarter = 4))
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA

Bottom salinity

# https://data.marine.copernicus.eu/product/BALTICSEA_REANALYSIS_PHY_003_011/download?dataset=dataset-reanalysis-nemo-monthlymeans

# Open the netCDF file
ncin <- nc_open("data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1668587452211.nc")

print(ncin)
#> File data/NEMO_Nordic_SCOBI/dataset-reanalysis-nemo-monthlymeans_1668587452211.nc (NC_FORMAT_CLASSIC):
#> 
#>      1 variables (excluding dimension variables):
#>         float sob[longitude,latitude,time]   
#>             long_name: Sea water salinity at sea floor
#>             missing_value: NaN
#>             standard_name: sea_water_salinity
#>             units: 0.001
#>             _FillValue: NaN
#>             _ChunkSizes: 1
#>              _ChunkSizes: 523
#>              _ChunkSizes: 383
#> 
#>      3 dimensions:
#>         time  Size:335
#>             axis: T
#>             long_name: Validity time
#>             standard_name: time
#>             units: days since 1950-01-01 00:00:00
#>             calendar: gregorian
#>             _ChunkSizes: 512
#>             _CoordinateAxisType: Time
#>             valid_min: 15751
#>             valid_max: 25917.5
#>         latitude  Size:187
#>             axis: Y
#>             standard_name: latitude
#>             long_name: latitude
#>             units: degrees_north
#>             _CoordinateAxisType: Lat
#>             valid_min: 53.1249580383301
#>             valid_max: 59.3248596191406
#>         longitude  Size:199
#>             standard_name: longitude
#>             long_name: longitude
#>             units: degrees_east
#>             axis: X
#>             _CoordinateAxisType: Lon
#>             valid_min: 11.1248445510864
#>             valid_max: 22.12473487854
#> 
#>     24 global attributes:
#>         references: http://www.smhi.se
#>         institution: Swedish Meterological and Hydrological Institute
#>         history: See source and creation_date attributees
#>         Conventions: CF-1.5
#>         contact: servicedesk_cmems@mercator-ocean.eu
#>         comment: Provided by SMHI as a Copernicus Marine Environment Monitoring Service production unit
#>         bullentin_type: reanalysis
#>         cmems_product_id: BALTICSEA_REANALYSIS_PHY_003_011
#>         title: CMEMS V4 Reanalysis: NEMO model 3D fields (monthly means)
#>         FROM_ORIGINAL_FILE__easternmost_longitude: 30.2357654571533
#>         FROM_ORIGINAL_FILE__northernmost_latitude: 65.8914184570312
#>         FROM_ORIGINAL_FILE__westernmost_longitude: 9.01375484466553
#>         FROM_ORIGINAL_FILE__southernmost_latitude: 48.49169921875
#>         shallowest_depth: 1.50136542320251
#>         deepest_depth: 711.059204101562
#>         source: SMHI reanalysis run NORDIC-NS2_1d_20201201_20201201
#>         file_quality_index: 1
#>         creation_date: 2021-11-09 UTC
#>         bullentin_date: 20201201
#>         start_date: 2020-12-01 UTC
#>         stop_date: 2020-12-01 UTC
#>         start_time: 00:00 UTC
#>         stop_time: 00:00 UTC
#>         _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention

# Get longitude and latitude
lon <- ncvar_get(ncin,"longitude")
nlon <- dim(lon)
head(lon)
#> [1] 11.12484 11.18040 11.23596 11.29151 11.34706 11.40262

lat <- ncvar_get(ncin,"latitude")
nlat <- dim(lat)
head(lat)
#> [1] 53.12496 53.15829 53.19162 53.22496 53.25829 53.29162

# Get time
time <- ncvar_get(ncin,"time")
time
#>   [1] 15751.0 15780.5 15811.0 15841.5 15872.0 15902.5 15933.5 15964.0 15994.5
#>  [10] 16025.0 16055.5 16086.5 16116.0 16145.5 16176.0 16206.5 16237.0 16267.5
#>  [19] 16298.5 16329.0 16359.5 16390.0 16420.5 16451.5 16481.0 16510.5 16541.0
#>  [28] 16571.5 16602.0 16632.5 16663.5 16694.0 16724.5 16755.0 16785.5 16816.5
#>  [37] 16846.5 16876.5 16907.0 16937.5 16968.0 16998.5 17029.5 17060.0 17090.5
#>  [46] 17121.0 17151.5 17182.5 17212.0 17241.5 17272.0 17302.5 17333.0 17363.5
#>  [55] 17394.5 17425.0 17455.5 17486.0 17516.5 17547.5 17577.0 17606.5 17637.0
#>  [64] 17667.5 17698.0 17728.5 17759.5 17790.0 17820.5 17851.0 17881.5 17912.5
#>  [73] 17942.0 17971.5 18002.0 18032.5 18063.0 18093.5 18124.5 18155.0 18185.5
#>  [82] 18216.0 18246.5 18277.5 18307.5 18337.5 18368.0 18398.5 18429.0 18459.5
#>  [91] 18490.5 18521.0 18551.5 18582.0 18612.5 18643.5 18673.0 18702.5 18733.0
#> [100] 18763.5 18794.0 18824.5 18855.5 18886.0 18916.5 18947.0 18977.5 19008.5
#> [109] 19038.0 19067.5 19098.0 19128.5 19159.0 19189.5 19220.5 19251.0 19281.5
#> [118] 19312.0 19342.5 19373.5 19403.0 19432.5 19463.0 19493.5 19524.0 19554.5
#> [127] 19585.5 19616.0 19646.5 19677.0 19707.5 19738.5 19768.5 19798.5 19829.0
#> [136] 19859.5 19890.0 19920.5 19951.5 19982.0 20012.5 20043.0 20073.5 20104.5
#> [145] 20134.0 20163.5 20194.0 20224.5 20255.0 20285.5 20316.5 20347.0 20377.5
#> [154] 20408.0 20438.5 20469.5 20499.0 20528.5 20559.0 20589.5 20620.0 20650.5
#> [163] 20681.5 20712.0 20742.5 20773.0 20803.5 20834.5 20864.0 20893.5 20924.0
#> [172] 20954.5 20985.0 21015.5 21046.5 21077.0 21107.5 21138.0 21168.5 21199.5
#> [181] 21229.5 21259.5 21290.0 21320.5 21351.0 21381.5 21412.5 21443.0 21473.5
#> [190] 21504.0 21534.5 21565.5 21595.0 21624.5 21655.0 21685.5 21716.0 21746.5
#> [199] 21777.5 21808.0 21838.5 21869.0 21899.5 21930.5 21960.0 21989.5 22020.0
#> [208] 22050.5 22081.0 22111.5 22142.5 22173.0 22203.5 22234.0 22264.5 22295.5
#> [217] 22325.0 22354.5 22385.0 22415.5 22446.0 22476.5 22507.5 22538.0 22568.5
#> [226] 22599.0 22629.5 22660.5 22690.5 22720.5 22751.0 22781.5 22812.0 22842.5
#> [235] 22873.5 22904.0 22934.5 22965.0 22995.5 23026.5 23056.0 23085.5 23116.0
#> [244] 23146.5 23177.0 23207.5 23238.5 23269.0 23299.5 23330.0 23360.5 23391.5
#> [253] 23421.0 23450.5 23481.0 23511.5 23542.0 23572.5 23603.5 23634.0 23664.5
#> [262] 23695.0 23725.5 23756.5 23786.0 23815.5 23846.0 23876.5 23907.0 23937.5
#> [271] 23968.5 23999.0 24029.5 24060.0 24090.5 24121.5 24151.5 24181.5 24212.0
#> [280] 24242.5 24273.0 24303.5 24334.5 24365.0 24395.5 24426.0 24456.5 24487.5
#> [289] 24517.0 24546.5 24577.0 24607.5 24638.0 24668.5 24699.5 24730.0 24760.5
#> [298] 24791.0 24821.5 24852.5 24882.0 24911.5 24942.0 24972.5 25003.0 25033.5
#> [307] 25064.5 25095.0 25125.5 25156.0 25186.5 25217.5 25247.0 25276.5 25307.0
#> [316] 25337.5 25368.0 25398.5 25429.5 25460.0 25490.5 25521.0 25551.5 25582.5
#> [325] 25612.5 25642.5 25673.0 25703.5 25734.0 25764.5 25795.5 25826.0 25856.5
#> [334] 25887.0 25917.5

tunits <- ncatt_get(ncin,"time","units")
nt <- dim(time)
nt
#> [1] 335
tunits
#> $hasatt
#> [1] TRUE
#> 
#> $value
#> [1] "days since 1950-01-01 00:00:00"

# Get Salinity
dname <- "sob"

sal_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(sal_array)
#> [1] 199 187 335

# Get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")

# Convert time: split the time units string into fields
tustr <- strsplit(tunits$value, " ")
tdstr <- strsplit(unlist(tustr)[3], "-")
tmonth <- as.integer(unlist(tdstr)[2])
tday <- as.integer(unlist(tdstr)[3])
tyear <- as.integer(unlist(tdstr)[1])

# Here I deviate from the guide a little bit. Save this info:
dates <- chron(time, origin = c(tmonth, tday, tyear))

# Crop the date variable
months <- as.numeric(substr(dates, 2, 3))
years <- as.numeric(substr(dates, 8, 9))
years <- ifelse(years > 90, 1900 + years, 2000 + years)

# Replace netCDF fill values with NA's
sal_array[sal_array == fillvalue$value] <- NA

# Next, we need to work with the months that correspond to the quarters that we use.
# loop through each time step, and if it is a good month save it as a raster.
# First get the index of months that correspond to Q4
months
#>   [1]  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2
#>  [26]  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3
#>  [51]  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4
#>  [76]  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5
#> [101]  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6
#> [126]  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7
#> [151]  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8
#> [176]  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9
#> [201] 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10
#> [226] 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11
#> [251] 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12
#> [276]  1  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1
#> [301]  2  3  4  5  6  7  8  9 10 11 12  1  2  3  4  5  6  7  8  9 10 11 12  1  2
#> [326]  3  4  5  6  7  8  9 10 11 12

index_keep_q1 <- which(months < 4)
index_keep_q4 <- which(months > 9)

sal_q1 <- sal_array[, , index_keep_q1]
sal_q4 <- sal_array[, , index_keep_q4]

months_keep_q1 <- months[index_keep_q1]
months_keep_q4 <- months[index_keep_q4]

years_keep_q1 <- years[index_keep_q1]
years_keep_q4 <- years[index_keep_q4]

# Now we have an array with data for that quarter
# We need to now calculate the average within a year.
# Get a sequence that takes every third value between 1: number of months (length)
loop_seq_q1 <- seq(1, dim(sal_q1)[3], by = 3)
loop_seq_q4 <- seq(1, dim(sal_q4)[3], by = 3)

# Create objects that will hold data
dlist_q1 <- list()
dlist_q4 <- list()

sal_1 <- c()
sal_2 <- c()
sal_3 <- c()
sal_ave_q1 <- c()

sal_10 <- c()
sal_11 <- c()
sal_12 <- c()
sal_ave_q4 <- c()

# Now average by quarter. The vector loop_seq_q1 is 1, 4, 7 etc. So first i is 1, 2, 3,
# which is the index we want.

dim(sal_q1)
#> [1] 199 187  83
dim(sal_q4)
#> [1] 199 187  84

# Hmm, we didn't get the first month in the salinity series... repeat month 2 and fill in so the dimensions are correct
sal_q1 <- sal_q1[,,c(1, 1:83)]

dim(sal_q1)
#> [1] 199 187  84

for(i in loop_seq_q1) {

  sal_1 <- sal_q1[, , (i)]
  sal_2 <- sal_q1[, , (i + 1)]
  sal_3 <- sal_q1[, , (i + 2)]

  sal_10 <- sal_q4[, , (i)]
  sal_11 <- sal_q4[, , (i + 1)]
  sal_12 <- sal_q4[, , (i + 2)]

  sal_ave_q1 <- (sal_1 + sal_2 + sal_3) / 3
  sal_ave_q4 <- (sal_10 + sal_11 + sal_12) / 3

  list_pos_q1 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)
  list_pos_q4 <- ((i/3) - (1/3)) + 1 # to get index 1:n(years)

  dlist_q1[[list_pos_q1]] <- sal_ave_q1
  dlist_q4[[list_pos_q4]] <- sal_ave_q4

}

# Now name the lists with the year:
names(dlist_q1) <- unique(years_keep_q1)
names(dlist_q4) <- unique(years_keep_q4)

# Now I need to make a loop where I extract the raster value for each year...
# The cpue data is called dat so far in this script

# Filter years in the cpue data frame to only have the years I have salinity for
d_sub_sal_q1 <- dat_haul %>% filter(quarter == 1) %>% filter(year %in% names(dlist_q1)) %>% droplevels()
#> filter: removed 3,779 rows (40%), 5,597 rows remaining
#> filter: no rows removed
d_sub_sal_q4 <- dat_haul %>% filter(quarter == 4) %>% filter(year %in% names(dlist_q4)) %>% droplevels()
#> filter: removed 5,709 rows (61%), 3,667 rows remaining
#> filter: no rows removed

# Create data holding object
sal_data_list_q1 <- list()
sal_data_list_q4 <- list()

# ... And for the salinity raster
raster_list_q1 <- list()
raster_list_q4 <- list()

# Create factor year for indexing the list in the loop
d_sub_sal_q1$year_f <- as.factor(d_sub_sal_q1$year)
d_sub_sal_q4$year_f <- as.factor(d_sub_sal_q4$year)

# Loop through each year and extract raster values for the cpue data points
for(i in unique(d_sub_sal_q1$year_f)) {

  # Set plot limits
  ymin = 54; ymax = 58; xmin = 12; xmax = 22

  # Subset a year
  sal_slice_q1 <- dlist_q1[[i]]
  sal_slice_q4 <- dlist_q4[[i]]

  # Create raster for that year (i)
  r_q1 <- raster(t(sal_slice_q1), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
                 crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))
  r_q4 <- raster(t(sal_slice_q4), xmn = min(lon), xmx = max(lon), ymn = min(lat), ymx = max(lat),
                 crs = CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs+ towgs84=0,0,0"))

  # Flip...
  r_q1 <- flip(r_q1, direction = 'y')
  r_q4 <- flip(r_q4, direction = 'y')

  plot(r_q1, main = paste(i, "Q1"))
  plot(r_q4, main = paste(i, "Q4"))

  # Filter the same year (i) in the cpue data and select only coordinates
  d_slice_q1 <- d_sub_sal_q1 %>% filter(year_f == i) %>% dplyr::select(lon, lat)
  d_slice_q4 <- d_sub_sal_q4 %>% filter(year_f == i) %>% dplyr::select(lon, lat)

  # Make into a SpatialPoints object
  data_sp_q1 <- SpatialPoints(d_slice_q1)
  data_sp_q4 <- SpatialPoints(d_slice_q4)

  # Extract raster value (salinity)
  rasValue_q1 <- raster::extract(r_q1, data_sp_q1)
  rasValue_q4 <- raster::extract(r_q4, data_sp_q4)

  # Now we want to plot the results of the raster extractions by plotting the cpue
  # data points over a raster and saving it for each year.
  # Make the SpatialPoints object into a raster again (for pl)
  df_q1 <- as.data.frame(data_sp_q1)
  df_q4 <- as.data.frame(data_sp_q4)

  # Add in the raster value in the df holding the coordinates for the cpue data
  d_slice_q1$sal <- rasValue_q1
  d_slice_q4$sal <- rasValue_q4

  # Add in which year
  d_slice_q1$year <- i
  d_slice_q4$year <- i

  # Create a index for the data last where we store all years (because our loop index
  # i is not continuous, we can't use it directly)
  index_q1 <- as.numeric(d_slice_q1$year)[1] - 1992
  index_q4 <- as.numeric(d_slice_q4$year)[1] - 1992

  # Add each years' data in the list
  sal_data_list_q1[[index_q1]] <- d_slice_q1
  sal_data_list_q4[[index_q4]] <- d_slice_q4

}

#> filter: removed 5,496 rows (98%), 101 rows remaining
#> filter: removed 3,606 rows (98%), 61 rows remaining

#> filter: removed 5,422 rows (97%), 175 rows remaining
#> filter: removed 3,605 rows (98%), 62 rows remaining

#> filter: removed 5,443 rows (97%), 154 rows remaining
#> filter: removed 3,614 rows (99%), 53 rows remaining

#> filter: removed 5,414 rows (97%), 183 rows remaining
#> filter: removed 3,606 rows (98%), 61 rows remaining

#> filter: removed 5,383 rows (96%), 214 rows remaining
#> filter: removed 3,592 rows (98%), 75 rows remaining

#> filter: removed 5,369 rows (96%), 228 rows remaining
#> filter: removed 3,599 rows (98%), 68 rows remaining

#> filter: removed 5,407 rows (97%), 190 rows remaining
#> filter: removed 3,573 rows (97%), 94 rows remaining

#> filter: removed 5,487 rows (98%), 110 rows remaining
#> filter: removed 3,578 rows (98%), 89 rows remaining

#> filter: removed 5,389 rows (96%), 208 rows remaining
#> filter: removed 3,551 rows (97%), 116 rows remaining

#> filter: removed 5,452 rows (97%), 145 rows remaining
#> filter: removed 3,550 rows (97%), 117 rows remaining

#> filter: removed 5,436 rows (97%), 161 rows remaining
#> filter: removed 3,539 rows (97%), 128 rows remaining

#> filter: removed 5,390 rows (96%), 207 rows remaining
#> filter: removed 3,558 rows (97%), 109 rows remaining

#> filter: removed 5,379 rows (96%), 218 rows remaining
#> filter: removed 3,516 rows (96%), 151 rows remaining

#> filter: removed 5,409 rows (97%), 188 rows remaining
#> filter: removed 3,517 rows (96%), 150 rows remaining

#> filter: removed 5,362 rows (96%), 235 rows remaining
#> filter: removed 3,498 rows (95%), 169 rows remaining

#> filter: removed 5,375 rows (96%), 222 rows remaining
#> filter: removed 3,491 rows (95%), 176 rows remaining

#> filter: removed 5,338 rows (95%), 259 rows remaining
#> filter: removed 3,487 rows (95%), 180 rows remaining

#> filter: removed 5,356 rows (96%), 241 rows remaining
#> filter: removed 3,505 rows (96%), 162 rows remaining

#> filter: removed 5,354 rows (96%), 243 rows remaining
#> filter: removed 3,487 rows (95%), 180 rows remaining

#> filter: removed 5,373 rows (96%), 224 rows remaining
#> filter: removed 3,534 rows (96%), 133 rows remaining

#> filter: removed 5,329 rows (95%), 268 rows remaining
#> filter: removed 3,519 rows (96%), 148 rows remaining

#> filter: removed 5,389 rows (96%), 208 rows remaining
#> filter: removed 3,491 rows (95%), 176 rows remaining

#> filter: removed 5,360 rows (96%), 237 rows remaining
#> filter: removed 3,501 rows (95%), 166 rows remaining

#> filter: removed 5,365 rows (96%), 232 rows remaining
#> filter: removed 3,454 rows (94%), 213 rows remaining

#> filter: removed 5,351 rows (96%), 246 rows remaining
#> filter: removed 3,442 rows (94%), 225 rows remaining

#> filter: removed 5,341 rows (95%), 256 rows remaining
#> filter: removed 3,456 rows (94%), 211 rows remaining

#> filter: removed 5,479 rows (98%), 118 rows remaining
#> filter: removed 3,575 rows (97%), 92 rows remaining

#> filter: removed 5,471 rows (98%), 126 rows remaining
#> filter: removed 3,565 rows (97%), 102 rows remaining


# Now create a data frame from the list of all annual values
big_dat_sal_q1 <- dplyr::bind_rows(sal_data_list_q1)
big_dat_sal_q4 <- dplyr::bind_rows(sal_data_list_q4)
big_dat_sal <- bind_rows(mutate(big_dat_sal_q1, quarter = 1),
                          mutate(big_dat_sal_q4, quarter = 4))
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
#> mutate: new variable 'quarter' (double) with one unique value and 0% NA
env_dat <- left_join(big_dat_oxy %>% mutate(id_env = paste(year, quarter, lon, lat, sep = ".")),
                     big_dat_temp %>% mutate(id_env = paste(year, quarter, lon, lat, sep = ".")) %>% 
                       dplyr::select(temp, id_env))
#> mutate: new variable 'id_env' (character) with 9,201 unique values and 0% NA
#> mutate: new variable 'id_env' (character) with 9,201 unique values and 0% NA
#> Joining, by = "id_env"
#> left_join: added one column (temp)
#> > rows only in x 0
#> > rows only in y ( 0)
#> > matched rows 9,402 (includes duplicates)
#> > =======
#> > rows total 9,402

env_dat <- left_join(env_dat,
                     big_dat_sal %>%
                       mutate(id_env = paste(year, quarter, lon, lat, sep = ".")) %>% 
                       dplyr::select(sal, id_env))
#> mutate: new variable 'id_env' (character) with 9,201 unique values and 0% NA
#> Joining, by = "id_env"left_join: added one column (sal)
#>            > rows only in x       0
#>            > rows only in y  (    0)
#>            > matched rows     9,720    (includes duplicates)
#>            >                 =======
#>            > rows total       9,720

env_dat <- left_join(env_dat,
                     dat_haul %>% mutate(id_env = paste(year, quarter, lon, lat, sep = ".")) %>% 
                       dplyr::select(depth, id_env))
#> mutate: new variable 'id_env' (character) with 9,312 unique values and 0% NA
#> Joining, by = "id_env"left_join: added one column (depth)
#>            > rows only in x        0
#>            > rows only in y  (   112)
#>            > matched rows     10,506    (includes duplicates)
#>            >                 ========
#>            > rows total       10,506

# Now join these data with the full_dat
dat_full <- left_join(dat %>% mutate(id_env = paste(year, quarter, lon, lat, sep = ".")),
                      env_dat %>% dplyr::select(id_env, oxy, temp, sal, depth))
#> mutate: new variable 'id_env' (character) with 9,312 unique values and 0% NA
#> Joining, by = "id_env"left_join: added 4 columns (oxy, temp, sal, depth)
#>            > rows only in x      32,704
#>            > rows only in y  (        0)
#>            > matched rows     3,487,061    (includes duplicates)
#>            >                 ===========
#>            > rows total       3,519,765

Add UTM coords

# First add UTM coords
# Add UTM coords

utm_coords <- LongLatToUTM(dat_full$lon, dat_full$lat, zone = 33)
#> Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
#> prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition
dat_full$X <- utm_coords$X/1000 # for computational reasons
dat_full$Y <- utm_coords$Y/1000 # for computational reasons

Save data

dat_full_save <- dat_full %>%
  dplyr::select(-IDx, -id_env, -haul.id.size) %>% 
  janitor::clean_names()

write.csv(dat_full_save, file = "data/clean/catch_by_length_q1_q4.csv", row.names = FALSE)

read and summarize in new script